diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard')
985 files changed, 0 insertions, 9636 deletions
diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt deleted file mode 100644 index e1ebaee3..00000000 --- a/inc/3rdparty/site_config/standard/.about.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | body: //div[@id='articlebody'] | ||
2 | title: //h1 | ||
3 | author: //p[@id='by']//a | ||
4 | |||
5 | next_page_link: //span[@class='next']/a | ||
6 | # Not the same as below! | ||
7 | |||
8 | prune: yes | ||
9 | tidy: no | ||
10 | |||
11 | # Annoying 'next' links plainly inside the article body | ||
12 | strip: //*[text()[contains(.,'Next: ')]] | ||
13 | |||
14 | test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm | ||
diff --git a/inc/3rdparty/site_config/standard/.wikipedia.org.txt b/inc/3rdparty/site_config/standard/.wikipedia.org.txt deleted file mode 100644 index 8b98ae4b..00000000 --- a/inc/3rdparty/site_config/standard/.wikipedia.org.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //h1[@id='firstHeading'] | ||
2 | body: //div[@id = 'bodyContent'] | ||
3 | strip_id_or_class: editsection | ||
4 | #strip_id_or_class: toc | ||
5 | strip_id_or_class: vertical-navbox | ||
6 | strip: //table[@id='toc'] | ||
7 | strip: //div[@id='catlinks'] | ||
8 | strip: //div[@id='jump-to-nav'] | ||
9 | strip: //div[@class='thumbcaption']//div[@class='magnify'] | ||
10 | strip: //table[@class='navbox'] | ||
11 | strip: //table[contains(@class, 'infobox')] | ||
12 | strip: //div[@class='dablink'] | ||
13 | strip: //div[@id='contentSub'] | ||
14 | strip: //table[contains(@class, 'metadata')] | ||
15 | strip: //*[contains(@class, 'noprint')] | ||
16 | strip: //span[@title='pronunciation:'] | ||
17 | prune: no | ||
18 | tidy: no | ||
19 | test_url: http://en.wikipedia.org/wiki/Christopher_Lloyd \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/24ways.org.txt b/inc/3rdparty/site_config/standard/24ways.org.txt deleted file mode 100755 index 86c9e077..00000000 --- a/inc/3rdparty/site_config/standard/24ways.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='meta']/h2/a | ||
2 | author: //div[@class='meta']/h2/following-sibling::p/a/text() | ||
3 | date://div[@class='meta']/h2/strong | ||
4 | body: //div[@id='article'] | ||
5 | strip: //div[@class='domore'] | ||
6 | test_url: http://24ways.org/2011/composing-the-new-canon \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/36kr.com.txt b/inc/3rdparty/site_config/standard/36kr.com.txt deleted file mode 100755 index d73d7de5..00000000 --- a/inc/3rdparty/site_config/standard/36kr.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'entry-title')] | ||
2 | date: //meta[@name='weibo: article:create_at']/@content | ||
3 | body: //div[contains(@class, 'mainContent')] | ||
4 | strip_id_or_class: related_topics | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.36kr.com/p/207879.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt deleted file mode 100755 index 531cac1e..00000000 --- a/inc/3rdparty/site_config/standard/37signals.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='post_header']//h2/a | ||
2 | author: //span[@class='author'] | ||
3 | date: //span[@class='date'] | ||
4 | body: //div[@id='Content'] | ||
5 | |||
6 | test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt deleted file mode 100755 index 80a3958f..00000000 --- a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@class='content'] | ||
2 | date: //div[@class='content']/h2 | ||
3 | strip: //div[@class='content']/h2 | ||
4 | title: //div[@class='content']/h3 | ||
5 | |||
6 | strip: //div[@id='postmenu'] | ||
7 | strip: //div[@class='trackback'] | ||
8 | tidy: no | ||
9 | test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt deleted file mode 100755 index b846b050..00000000 --- a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@id='main'] | ||
2 | title: //div[@class='intro']/h1 | ||
3 | author: //ul[@class='text-data']/li[@class='author'] | ||
4 | date: //ul[@class='text-data']/li[@class='date'] | ||
5 | convert_double_br_tags: yes | ||
6 | tidy: no | ||
7 | |||
8 | strip: //div[@class='share'] | ||
9 | strip: //*[@class='zoom'] | ||
10 | strip: //div[@id='disqus_thread'] | ||
11 | test_url: http://3voor12.vpro.nl/nieuws/2012/januari/Ook-website-GroenLinks-woensdag-op-zwart-i-v-m--SOPA.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt deleted file mode 100755 index 3777c66f..00000000 --- a/inc/3rdparty/site_config/standard/43folders.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //*[@class = 'content'] | ||
2 | author: //*[@class = 'submitted']/a | ||
3 | date: substring-after(//*[@class = 'submitted']/text(), '|') | ||
4 | test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt deleted file mode 100755 index b9b7e9dd..00000000 --- a/inc/3rdparty/site_config/standard/500px.com.txt +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | # very loose setup for both 500px.com/photo/* and 500px.com/blog/* | ||
2 | # photo page example: http://500px.com/photo/4181666 | ||
3 | # blog page example: http://500px.com/blog/110 | ||
4 | |||
5 | # avoid "no text" error | ||
6 | tidy:no | ||
7 | prune:no | ||
8 | |||
9 | # reorganize photo page elements | ||
10 | #body://div[contains(@class,'container')] | ||
11 | move_into(body)://div[contains(@id,'thephoto')] | ||
12 | move_into(body)://div[contains(@id,'description')] | ||
13 | move_into(body)://div[contains(@id,'tags')] | ||
14 | move_into(body)://div[contains(@id,'photo-info')] | ||
15 | |||
16 | # clean photo page info | ||
17 | strip://span[contains(@id,'copyright')] | ||
18 | strip://*[contains(@id,'store')] | ||
19 | strip://*[contains(@id,'user-info')] | ||
20 | strip://*[contains(@id,'photo-stats')] | ||
21 | strip://*[contains(@id,'voting_controls_container')] | ||
22 | strip://*[contains(@id,'more-photos')] | ||
23 | strip://*[contains(@id,'embed-photo')] | ||
24 | |||
25 | # clean blog page side bar | ||
26 | strip://*[contains(@class,'col d3 clearafter')] | ||
27 | test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt deleted file mode 100755 index 02a996f7..00000000 --- a/inc/3rdparty/site_config/standard/512pixels.net.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | test_url: http://www.512pixels.net/blog/2014/10/the-move | ||
diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt deleted file mode 100755 index 59b70a99..00000000 --- a/inc/3rdparty/site_config/standard/5by5.tv.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //*[@id="episode"] | ||
2 | prune: no | ||
3 | tidy: no | ||
4 | |||
5 | autodetect_next_page: no | ||
6 | strip_id_or_class: player | ||
7 | |||
8 | strip://*[@id="header"] | ||
9 | test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/7newsbelize.com.txt b/inc/3rdparty/site_config/standard/7newsbelize.com.txt deleted file mode 100755 index 46d09f8e..00000000 --- a/inc/3rdparty/site_config/standard/7newsbelize.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //*[@id='sstitle'] | ||
2 | body: //div[@id='sstory'] | ||
3 | strip_id_or_class: newsoptions | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.7newsbelize.com/sstory.php?nid=25654 | ||
7 | test_url: http://www.7newsbelize.com/7news.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt deleted file mode 100755 index 8bf6a4c2..00000000 --- a/inc/3rdparty/site_config/standard/944.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h2[@class='border'] | ||
2 | body: //div[@class='padding'] | ||
3 | |||
4 | convert_double_br_tags: yes | ||
5 | |||
6 | strip: //div[@id='social_sharing'] | ||
7 | strip: //div[@class='socialLinks'] | ||
8 | |||
9 | test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/README.md b/inc/3rdparty/site_config/standard/README.md deleted file mode 100755 index ab5b12d9..00000000 --- a/inc/3rdparty/site_config/standard/README.md +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | Full-Text RSS site config files | ||
2 | ================ | ||
3 | |||
4 | [Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no rules are found, it tries to detect the content block automatically. | ||
5 | |||
6 | This repository contains the site-specific extraction rules we rely on in Full-Text RSS. | ||
7 | |||
8 | ### Contributing changes | ||
9 | |||
10 | We run automated tests on these files to detect issues. If you'd like to help keep these up to date, please look at the [test results](http://siteconfig.fivefilters.org/test/) and see which files you'd like to contribute fixes for. | ||
11 | |||
12 | We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface. | ||
13 | |||
14 | You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model: | ||
15 | |||
16 | > The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination. | ||
17 | |||
18 | When we receive a pull request we'll review the changes and if everything's okay we'll update our copy. | ||
19 | |||
20 | If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github). | ||
21 | |||
22 | ### How to write a site config file | ||
23 | |||
24 | The quickest and simplest way is to use our [point-and-click interface](http://siteconfig.fivefilters.org). It's a simple tool only intended to create a rule to extract the correct content block. | ||
25 | |||
26 | For further refinements, e.g. selecting the title, stripping elements, dealing with multi-page articles, please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns). | ||
27 | |||
28 | ### Instapaper | ||
29 | |||
30 | When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users. | ||
31 | |||
32 | Marco, Instapaper's creator, graciously opened up the database of contributions to everyone: | ||
33 | |||
34 | > And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached. | ||
35 | |||
36 | Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (no longer available since Instapaper was sold). | ||
37 | |||
38 | ### Testing site config files | ||
39 | |||
40 | Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier. | ||
diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt deleted file mode 100755 index b60c15de..00000000 --- a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] | ||
3 | |||
4 | strip_id_or_class: socialshareprivacy1 | ||
5 | strip_id_or_class: zvaFacebookButton | ||
6 | |||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt deleted file mode 100755 index 013afa4c..00000000 --- a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] | ||
3 | |||
4 | strip_id_or_class: socialshareprivacy1 | ||
5 | strip_id_or_class: zvaFacebookButton | ||
6 | |||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt deleted file mode 100755 index 43aadc49..00000000 --- a/inc/3rdparty/site_config/standard/abc.es.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text' or @itemprop='articleBody'] | ||
3 | strip_id_or_class: colB | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt deleted file mode 100755 index 22b3a0f4..00000000 --- a/inc/3rdparty/site_config/standard/abc.net.au.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: //div[@class='article section']//h1 | ||
2 | author: //div[@class="byline"]/a | ||
3 | date: //span[@class="timestamp"] | ||
4 | body: //div[@class="page section"] | ||
5 | |||
6 | strip: //a[@class="inline-caption"] | ||
7 | strip: //p[@class="ticker section noprint"] | ||
8 | strip: //p[@class="topics"] | ||
9 | strip: //h1 | ||
10 | strip: //div[@class="byline"] | ||
11 | strip: //p[@class="published"] | ||
12 | strip: //div[contains(@class,"featured-scroller")] | ||
13 | strip_id_or_class: footer | ||
14 | |||
15 | tidy: no | ||
16 | |||
17 | test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892 | ||
18 | test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business | ||
diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt deleted file mode 100755 index 8d367351..00000000 --- a/inc/3rdparty/site_config/standard/abcnews.go.com.txt +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | title: //h1[@class='headline'] | ||
2 | body: //div[@id='storyText'] | ||
3 | # for video entries | ||
4 | body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] | ||
5 | author: //div[@class='byline'] | ||
6 | date: //div[@class='date'] | ||
7 | strip: //*[@id='date_partner'] | ||
8 | |||
9 | strip: //div[@class='breadcrumb'] | ||
10 | strip: //div[contains(@class,'show_tools')] | ||
11 | strip: //div[@id='sponsoredByAd'] | ||
12 | strip: //div[contains(@class,'rel_container')] | ||
13 | strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] | ||
14 | strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] | ||
15 | strip: //p[contains(., 'Click here to return to')] | ||
16 | #strip_id_or_class: media | ||
17 | strip_id_or_class: mediaplayer | ||
18 | |||
19 | replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http | ||
20 | |||
21 | prune: no | ||
22 | |||
23 | single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true') | ||
24 | |||
25 | test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744 | ||
26 | # multi-page | ||
27 | test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt deleted file mode 100755 index 45d66533..00000000 --- a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id='H_docTitle'] | ||
2 | |||
3 | body: //div[@id='H_meta' or @id='H_content' or @id='F_footer'] | ||
4 | |||
5 | strip_id_or_class: F_toenail | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/acidcow.com.txt b/inc/3rdparty/site_config/standard/acidcow.com.txt deleted file mode 100755 index 21958651..00000000 --- a/inc/3rdparty/site_config/standard/acidcow.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[starts-with(@id, 'news-id-')] | ||
2 | |||
3 | test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/acquia.com.txt b/inc/3rdparty/site_config/standard/acquia.com.txt deleted file mode 100755 index 2803611f..00000000 --- a/inc/3rdparty/site_config/standard/acquia.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title://h1[@class="title"] | ||
2 | author://div[@class="submitted"]/span/a | ||
3 | date://div[@class="submitted"]/span | ||
4 | body://div[@class="content-wrapper"] | ||
5 | |||
6 | strip://div[@id="skip-link"] | ||
7 | strip://div[@id="region-content-3-3"] | ||
8 | strip://div[@id="section-footer"] | ||
9 | test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/acroswing.fr.txt b/inc/3rdparty/site_config/standard/acroswing.fr.txt deleted file mode 100755 index 6b1d67fe..00000000 --- a/inc/3rdparty/site_config/standard/acroswing.fr.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | tidy:no | ||
2 | date: //time[@class='updated'] | ||
3 | dissolve: //ul[@class='video-gallery']/li | ||
4 | dissolve: //ul[@class='video-gallery'] | ||
5 | test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/adme.ru.txt b/inc/3rdparty/site_config/standard/adme.ru.txt deleted file mode 100644 index b929685d..00000000 --- a/inc/3rdparty/site_config/standard/adme.ru.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | # Generated by FiveFilters.org's web-based selection tool | ||
2 | # Place this file inside your site_config/custom/ folder | ||
3 | # Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.adme.ru%2Ftvorchestvo-hudozhniki%2Fprostoj-kak-5-kopeek-hudozhnik-557405%2F | ||
4 | |||
5 | body: //article[contains(concat(' ',normalize-space(@class),' '),' article ')] | ||
6 | test_url: http://www.adme.ru/tvorchestvo-hudozhniki/prostoj-kak-5-kopeek-hudozhnik-557405/ | ||
diff --git a/inc/3rdparty/site_config/standard/aftenposten.no.txt b/inc/3rdparty/site_config/standard/aftenposten.no.txt deleted file mode 100755 index 8a69c357..00000000 --- a/inc/3rdparty/site_config/standard/aftenposten.no.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1[@class='articleTitle '] | ||
2 | body: //div[@class='bodyText widget storyContent'] | ||
3 | strip: //p/span[@class='quote']/.. | ||
4 | strip_id_or_class: 'pull1' | ||
5 | test_url: https://www.aftenposten.no/meninger/spaltister/Portrett-av-scenekunstneren-som-ung-mann-7167959.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aftonbladet.se.txt b/inc/3rdparty/site_config/standard/aftonbladet.se.txt deleted file mode 100755 index b6c576a8..00000000 --- a/inc/3rdparty/site_config/standard/aftonbladet.se.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | author: //article//address[contains(@class, 'author')] | ||
2 | body: //article[.//div[contains(@class, 'abBodyText')]]//*[contains(@class, 'abLeadText') or contains(@class, 'abBodyText') or contains(@class, 'abImageBlock') or contains(@class, 'abIGSatellite')] | ||
3 | |||
4 | strip: //address//img | ||
5 | strip: //footer | ||
6 | strip_id_or_class: abSticky | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.aftonbladet.se/sportbladet/hockey/sverige/allsvenskan/article17498194.ab | ||
11 | test_url: http://www.aftonbladet.se/debatt/article16207536.ab | ||
12 | test_url: http://www.aftonbladet.se/debatt/debattamnen/politik/article17483377.ab | ||
13 | test_url: http://www.aftonbladet.se/rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt deleted file mode 100755 index b2d88a05..00000000 --- a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | # clean up recipe pages | ||
4 | strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] | ||
5 | |||
6 | #recipe pages | ||
7 | strip_id_or_class: "recipe-feedback" | ||
8 | strip_id_or_class: "comments" | ||
9 | strip_id_or_class: "procedure-number" | ||
10 | strip_id_or_class: "more-with-author" | ||
11 | |||
12 | #slice | ||
13 | strip_id_or_class: "inner" | ||
14 | |||
15 | test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/albayan.ae.txt b/inc/3rdparty/site_config/standard/albayan.ae.txt deleted file mode 100755 index f6c093d2..00000000 --- a/inc/3rdparty/site_config/standard/albayan.ae.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@id='main-column']//div[@class='content'] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645 | ||
6 | test_url: http://www.albayan.ae/1.448?ot=ot.AjaxPageLayout \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alex.mullr.net.txt b/inc/3rdparty/site_config/standard/alex.mullr.net.txt deleted file mode 100755 index c5f15370..00000000 --- a/inc/3rdparty/site_config/standard/alex.mullr.net.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class="entry"] | ||
2 | test_url: http://alex.mullr.net/blog/2011/05/on-spotify/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alexduner.com.txt b/inc/3rdparty/site_config/standard/alexduner.com.txt deleted file mode 100755 index 3897f9ec..00000000 --- a/inc/3rdparty/site_config/standard/alexduner.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //section[@class='content'] | ||
2 | date: //span[1] | ||
3 | author: //h1[@id='sitetitle'] | ||
4 | test_url: http://alexduner.com/blog/something-i-learned-today | ||
diff --git a/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt b/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt deleted file mode 100755 index 875405e4..00000000 --- a/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //section[@class='content'] | ||
2 | date: //span[1] | ||
3 | author: //h1[@id='sitetitle'] | ||
4 | test_url: https://alexduner.squarespace.com/blog/2013/1/tech-culture-from-the-outside-looking-in \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alistapart.com.txt b/inc/3rdparty/site_config/standard/alistapart.com.txt deleted file mode 100755 index 7a7096e2..00000000 --- a/inc/3rdparty/site_config/standard/alistapart.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | author: //h3[@class='byline']/a | ||
3 | date: //div[@class='ishinfo'] | ||
4 | |||
5 | body: //*[@id='articletext'] | ||
6 | strip_id_or_class: 'ishinfo' | ||
7 | strip_id_or_class: 'metastuff' | ||
8 | strip_id_or_class: 'learnmore' | ||
9 | strip_id_or_class: 'discuss' | ||
10 | |||
11 | prune: no | ||
12 | test_url: http://www.alistapart.com/articles/organizing-mobile/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aljazeera.com.txt b/inc/3rdparty/site_config/standard/aljazeera.com.txt deleted file mode 100755 index d3bf4014..00000000 --- a/inc/3rdparty/site_config/standard/aljazeera.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //span[@id='DetailedTitle'] | ||
2 | body: //td[@id='tdTextContent'] | ||
3 | strip_id_or_class: Skyscrapper_Body | ||
4 | date: //span[@id='ctl00_cphBody_lblDate'] | ||
5 | author: //div[@id="dvAuthorInfo"]//a/text() | ||
6 | strip: //table[ tbody/tr/td/object ] | ||
7 | prune: no | ||
8 | test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/allrecipes.com.txt b/inc/3rdparty/site_config/standard/allrecipes.com.txt deleted file mode 100755 index 85dc2a5a..00000000 --- a/inc/3rdparty/site_config/standard/allrecipes.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //h1[@id='itemTitle'] | ||
2 | body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')] | ||
3 | strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right'] | ||
4 | strip: //div[contains(@class, 'rightcoltoolsdiv')] | ||
5 | strip: //div[contains(@class, 'servings-form')] | ||
6 | strip: //p[@class='nutritional-information'] | ||
7 | strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')] | ||
8 | strip: //div[@id='nutri-info']/div[contains(@class, 'title')] | ||
9 | strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter'] | ||
10 | strip_id_or_class: eshaAttribute | ||
11 | strip_id_or_class: eshaParagraph | ||
12 | prune: no | ||
13 | |||
14 | test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/allthingsd.com.txt b/inc/3rdparty/site_config/standard/allthingsd.com.txt deleted file mode 100755 index f8c67d02..00000000 --- a/inc/3rdparty/site_config/standard/allthingsd.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title://div[@class="article-title"]/h1[@class="title"] | ||
2 | date: //p[@class="article-date"] | ||
3 | body://div[contains(@class, "article-body")] | ||
4 | # Trim out related posts at bottom of article | ||
5 | strip://blockquote[@class="memo"] | ||
6 | |||
7 | tidy: no | ||
8 | |||
9 | # Yup, no idea why author won't work... | ||
10 | author://div[@class="page-header article-header clearfix"]/p[@class="title"] | ||
11 | # [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it. | ||
12 | test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/ | ||
13 | test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/allyou.com.txt b/inc/3rdparty/site_config/standard/allyou.com.txt deleted file mode 100755 index a13a7252..00000000 --- a/inc/3rdparty/site_config/standard/allyou.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div[@id='pageHdr']//h1 | ||
2 | body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint'] | ||
3 | strip: //div[contains(@class, 'infoBox') or @id='infoBox'] | ||
4 | single_page_link: //li[@id='print']/a | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt deleted file mode 100755 index da1a67bc..00000000 --- a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@class = 'entry'] | ||
2 | date: substring-after(//p[@class="date"],'بتاريخ ') | ||
3 | strip_id_or_class: date | ||
4 | strip_id_or_class: follow-single | ||
5 | strip_id_or_class: ratingblock | ||
6 | strip_id_or_class: newRatingHolder | ||
7 | strip_id_or_class: postmetadata | ||
8 | strip_id_or_class: addthis_toolbox | ||
9 | strip_id_or_class: addthis_default_style | ||
10 | strip_id_or_class: size-full | ||
11 | test_url: http://alphabeta.argaam.com/?p=35657 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alriyadh.com.txt b/inc/3rdparty/site_config/standard/alriyadh.com.txt deleted file mode 100755 index be7c43d5..00000000 --- a/inc/3rdparty/site_config/standard/alriyadh.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@id = "article-view"] | ||
2 | body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')] | ||
3 | author: //p[@class = "author"] | ||
4 | strip: //h1 | ||
5 | strip: //h2 | ||
6 | strip_id_or_class: author | ||
7 | prune: no | ||
8 | test_url: http://www.alriyadh.com/2011/10/10/article674357.html | ||
9 | test_url: http://www.alriyadh.com/net/article/780935 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alseraj.net.txt b/inc/3rdparty/site_config/standard/alseraj.net.txt deleted file mode 100755 index 107d82d6..00000000 --- a/inc/3rdparty/site_config/standard/alseraj.net.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | title: //*[@id='normalfontyellow'] | ||
2 | test_url: http://www.alseraj.net/cgi-bin/pros/av/LeqaTextDisplay.cgi?display&2 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alt1040.com.txt b/inc/3rdparty/site_config/standard/alt1040.com.txt deleted file mode 100755 index 4fd45719..00000000 --- a/inc/3rdparty/site_config/standard/alt1040.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://alt1040.com/2011/09/banda-ancha-en-america-latina-insignificante \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alternet.org.txt b/inc/3rdparty/site_config/standard/alternet.org.txt deleted file mode 100755 index e92252eb..00000000 --- a/inc/3rdparty/site_config/standard/alternet.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | single_page_link: //div[contains(@class, 'story_tools')]//a[contains(@href, '/print/')] | ||
2 | |||
3 | test_url: http://www.alternet.org/civil-liberties/noam-chomsky-surveillance-state-beyond-imagination-being-created-one-freest | ||
4 | test_url: http://feeds.feedblitz.com/alternet \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/altfoto.com.txt b/inc/3rdparty/site_config/standard/altfoto.com.txt deleted file mode 100755 index d974cf4a..00000000 --- a/inc/3rdparty/site_config/standard/altfoto.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://altfoto.com/2011/09/nikon-presenta-su-nuevo-sistema-nikon-1-y-dos-nuevas-camaras \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt deleted file mode 100755 index a5bd03bf..00000000 --- a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ") | ||
4 | |||
5 | date: //div/a[contains (@href, "issue")] | ||
6 | |||
7 | move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1] | ||
8 | |||
9 | body: //div[@class="enableBullets"] | ||
10 | test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/amandala.com.bz.txt b/inc/3rdparty/site_config/standard/amandala.com.bz.txt deleted file mode 100755 index fb0e21b8..00000000 --- a/inc/3rdparty/site_config/standard/amandala.com.bz.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@id='content']//div[contains(@class, 'content')] | ||
2 | strip_id_or_class: widget | ||
3 | strip: //a[contains(@href, 'upm_export=')] | ||
4 | |||
5 | test_url: http://amandala.com.bz/news/feed/ | ||
6 | test_url: http://amandala.com.bz/news/poor-pse-results-30-raise/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/amazon.com.txt b/inc/3rdparty/site_config/standard/amazon.com.txt deleted file mode 100755 index cd7ad159..00000000 --- a/inc/3rdparty/site_config/standard/amazon.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //span[@id = 'btAsinTitle'] | ||
2 | body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div | ||
3 | #strip_id_or_class: quantityDropdownDiv | ||
4 | #strip_id_or_class: addToCartSpan | ||
5 | #strip_id_or_class: oneClickDiv | ||
6 | strip_id_or_class: nocontent | ||
7 | strip_id_or_class: masDynamicConten | ||
8 | strip_id_or_class: dynamic-content | ||
9 | prune: no | ||
10 | |||
11 | find_string: <span id="actualPriceValue"> | ||
12 | replace_string: <span id="actualPriceValue"><br />Price: | ||
13 | |||
14 | strip_id_or_class: collapsePS | ||
15 | strip_id_or_class: expandPS | ||
16 | strip_id_or_class: psPlaceHolde | ||
17 | strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] | ||
18 | |||
19 | test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt deleted file mode 100755 index 7145f3ff..00000000 --- a/inc/3rdparty/site_config/standard/americandrink.net.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='head']/h2/a | ||
2 | author: //div[@class='head']/a | ||
3 | date: //div[@class='head']/p[@class='date']/a | ||
4 | body: //div[@class='copy'] | ||
5 | strip: //p[@class='meta'] | ||
6 | test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt deleted file mode 100755 index 31723f81..00000000 --- a/inc/3rdparty/site_config/standard/americascup.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //div[@class="editorial-content"]/h3 | ||
2 | body: //div[@class="hero-image" or @class="editorial-content"] | ||
3 | |||
4 | strip: //ul[@class="hero-caption"] | ||
5 | strip_id_or_class: footer | ||
6 | |||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt deleted file mode 100755 index c2b62b5a..00000000 --- a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1[@class="post-title"] | ||
2 | author: //span[@class="author"]/a | ||
3 | date: //span[@class="date"] | ||
4 | body: //div[@class="post-content main"] | ||
5 | test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/amptoons.com.txt b/inc/3rdparty/site_config/standard/amptoons.com.txt deleted file mode 100755 index 87547c63..00000000 --- a/inc/3rdparty/site_config/standard/amptoons.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //title | ||
2 | |||
3 | body: //div[@class="entry-content"] | ||
4 | |||
5 | author: //span[@class="author vcard"] | ||
6 | |||
7 | date: //span[@class="entry-date"] | ||
8 | test_url: http://www.amptoons.com/blog/2013/03/14/open-thread-and-link-farm-i-hate-being-sick-edition/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt deleted file mode 100755 index fc95c5d8..00000000 --- a/inc/3rdparty/site_config/standard/anandtech.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | body: //section[@class='main_cont']/img | //div[@class='articleContent'] | ||
2 | title: //div[@class='blog_top_left']//h2 | ||
3 | author: //a[@class='b'][1] | ||
4 | date: substring-after(substring-before(//div, 'Posted in'), ' on ') | ||
5 | strip_image_src: /content/images/globals/ | ||
6 | strip: //h2[. = 'Page 1']/preceding::p | ||
7 | strip: //h2 | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) | ||
12 | |||
13 | test_url: http://www.anandtech.com/show/8370/gigabyte-am1m-s2h-review | ||
14 | test_url: http://www.anandtech.com/show/8402/sandisk-releases-ultra-ii-ssd-the-second-tlc-nand-ssd-in-the-market | ||
15 | test_url: http://www.anandtech.com/show/8400/arms-cortex-m-even-smaller-and-lower-power-cpu-cores | ||
diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt deleted file mode 100755 index 8f9b1a21..00000000 --- a/inc/3rdparty/site_config/standard/androidpolice.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='post_content'] | ||
2 | date: //div[@class='date_day'] | div[@class='date_month'] | ||
3 | |||
4 | test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/ | ||
5 | |||
diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt deleted file mode 100755 index ce31fcf5..00000000 --- a/inc/3rdparty/site_config/standard/andyrutledge.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h2 | ||
2 | author: string('Andy Rutledge') | ||
3 | date: //div[@class='articledate'] | ||
4 | body: //div[@class='copybody'] | ||
5 | |||
6 | strip: //*[@class='space'] | ||
7 | strip: //*[@class='articleFoot'] | ||
8 | |||
9 | test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt deleted file mode 100755 index 2d8937f7..00000000 --- a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h1[@class="title"] | ||
2 | |||
3 | author: ("Anna Manasova") | ||
4 | # is ignored, unfortunately | ||
5 | |||
6 | date: //p[@class="date"] | ||
7 | |||
8 | body: //div[@class="entry"] | ||
9 | test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/apotheke-adhoc.de.txt b/inc/3rdparty/site_config/standard/apotheke-adhoc.de.txt deleted file mode 100755 index 3a702e7b..00000000 --- a/inc/3rdparty/site_config/standard/apotheke-adhoc.de.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | title: substring-before(//div[@id='content']/h1, ',') | ||
6 | |||
7 | single_page_link: //a[@title='Seite drucken'] | ||
8 | |||
9 | body: //div[@id='detail-body'] | ||
10 | |||
11 | replace_string(<span class="description">): <em> | ||
12 | replace_string(<p class="leadtext"><small>): <p class="leadtext"> | ||
13 | |||
14 | # Fix headlines | ||
15 | replace_string(Patrick Hollstein): | ||
16 | replace_string(APOTHEKE ADHOC): | ||
17 | replace_string(dpa): | ||
18 | replace_string(Katharina Lübke): | ||
19 | replace_string(Julia Pradel): | ||
20 | replace_string(Franziska Gerhardt): | ||
21 | |||
22 | test_url: http://www.apotheke-adhoc.de/nachrichten/politik/nachricht-detail-politik/deutscher-apothekertag-antraege-gegen-lieferengpaesse-2/ | ||
23 | |||
diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt deleted file mode 100755 index a820bba4..00000000 --- a/inc/3rdparty/site_config/standard/applature.com.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'title')# | ||
2 | body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer'] | ||
3 | date: //div[@class='date'] | ||
4 | |||
5 | strip_id_or_class: sharethis | ||
6 | strip_id_or_class: stats | ||
7 | strip_id_or_class: apply_form | ||
8 | strip_id_or_class: job_map | ||
9 | strip_id_or_class: respond | ||
10 | strip: //h1//span[@class='type'] | ||
11 | strip: //li[@class='print' or @class='map'] | ||
12 | |||
13 | replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla | ||
14 | |||
15 | prune: no | ||
16 | tidy: no | ||
17 | |||
18 | test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/apple.com.txt b/inc/3rdparty/site_config/standard/apple.com.txt deleted file mode 100755 index a54dccc8..00000000 --- a/inc/3rdparty/site_config/standard/apple.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | strip: //p[@class='sosumi'] | ||
2 | # Aren't they witty? | ||
3 | |||
4 | # I can't work out what causes the before the title. | ||
5 | title: //h1[@class='title'] | ||
6 | strip: //h1[@class='title'] | ||
7 | test_url: http://www.apple.com/pr/library/2011/02/15appstore.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/appledaily.com.tw.txt b/inc/3rdparty/site_config/standard/appledaily.com.tw.txt deleted file mode 100755 index 82d6f376..00000000 --- a/inc/3rdparty/site_config/standard/appledaily.com.tw.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[contains(@class, 'articulum')] | ||
2 | |||
3 | test_url: http://www.appledaily.com.tw/realtimenews/article/new/20140120/330479 | ||
4 | test_url: http://www.appledaily.com.tw/rss/create/kind/rnews/type/new/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/appleinsider.com.txt b/inc/3rdparty/site_config/standard/appleinsider.com.txt deleted file mode 100755 index 5ae1050b..00000000 --- a/inc/3rdparty/site_config/standard/appleinsider.com.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | title: //h1[@class="art-head"] | ||
2 | |||
3 | author: //p[contains(@class, 'byline')]/a | ||
4 | #author: //p[text() = 'By ']/a/text() | ||
5 | #strip: //p[text() = 'By '] | ||
6 | |||
7 | date: //p[contains(@class, 'date-header')] | ||
8 | |||
9 | body: //div[@class="article"] | ||
10 | strip_id_or_class: lazy | ||
11 | #strip_id_or_class: minor | ||
12 | strip_id_or_class: multipagefooter | ||
13 | strip_id_or_class: date-header | ||
14 | strip_id_or_class: byline | ||
15 | |||
16 | find_string: <noscript> | ||
17 | replace_string: <div> | ||
18 | find_string: </noscript> | ||
19 | replace_string: </div> | ||
20 | |||
21 | test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html | ||
22 | test_url: http://appleinsider.com/articles/13/10/03/goldee-companion-app-for-philips-hue-bulbs-offers-shifting-dynamic-light-scenes | ||
23 | test_url: http://appleinsider.com/appleinsider.rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/appleweblog.com.txt b/inc/3rdparty/site_config/standard/appleweblog.com.txt deleted file mode 100755 index 023c9ccb..00000000 --- a/inc/3rdparty/site_config/standard/appleweblog.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://appleweblog.com/2011/09/encontrada-vulnerabilidad-grave-en-skype-para-ios \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/archdaily.com.txt b/inc/3rdparty/site_config/standard/archdaily.com.txt deleted file mode 100755 index 0178639e..00000000 --- a/inc/3rdparty/site_config/standard/archdaily.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | date: //div[@class='post_date'] | ||
2 | |||
3 | body: //div[@class='post_content'] | ||
4 | |||
5 | test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/archiveofourown.org.txt b/inc/3rdparty/site_config/standard/archiveofourown.org.txt deleted file mode 100755 index 579de517..00000000 --- a/inc/3rdparty/site_config/standard/archiveofourown.org.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | # Description: Fix XPaths to include ALL chapters on 'view_full_work' pages. | ||
2 | # Include: work meta, summary, chapter information, and notes which Instapaper strips out on default. | ||
3 | # Exclude: header, footer, navigation, comments. | ||
4 | # Notes: User is a newbie with XPaths. | ||
5 | |||
6 | title: //h2[@class='title'] | ||
7 | author: //h3[@class='byline'] | ||
8 | author: //a[@class='login author'] | ||
9 | |||
10 | strip_id_or_class:header | ||
11 | strip_id_or_class:navigation | ||
12 | strip_id_or_class:feedback | ||
13 | strip_id_or_class:kudos | ||
14 | strip_id_or_class:add_comment_placeholder | ||
15 | strip_id_or_class:add_comment | ||
16 | strip_id_or_class:globalize | ||
17 | strip_id_or_class:footer | ||
18 | |||
19 | single_page_link: //div[@id='main']//a[contains(@href, 'view_adult=true')] | ||
20 | |||
21 | test_url: http://archiveofourown.org/works/229402?view_full_work=true | ||
22 | test_url: http://archiveofourown.org/works/750111/chapters/1399929 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/arstechnica.com.txt b/inc/3rdparty/site_config/standard/arstechnica.com.txt deleted file mode 100755 index eb92aa2c..00000000 --- a/inc/3rdparty/site_config/standard/arstechnica.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | author: //p[@class='byline']/a | ||
2 | body: //div[contains(@class,'article-content')] | ||
3 | strip: //h2[@class='title'] | ||
4 | strip_id_or_class: byline | ||
5 | strip_id_or_class: story-sidebar | ||
6 | prune: no | ||
7 | |||
8 | date: //div[@class='byline']/span[@class='posted']//abbr/@original-title | ||
9 | date: //div[@class='byline']/span[@class='posted']//abbr | ||
10 | |||
11 | title: //div[@id='story']//h2[@class='title'] | ||
12 | |||
13 | strip: //div[@class='pager'] | ||
14 | next_page_link: //nav//a[span/@class='next']/@href | ||
15 | |||
16 | native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')] | ||
17 | |||
18 | test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars | ||
19 | test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/ | ||
diff --git a/inc/3rdparty/site_config/standard/articles.boston.com.txt b/inc/3rdparty/site_config/standard/articles.boston.com.txt deleted file mode 100755 index 73bcdb4e..00000000 --- a/inc/3rdparty/site_config/standard/articles.boston.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1 | ||
2 | author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ") | ||
3 | date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"] | ||
4 | |||
5 | strip_id_or_class: mod-pagination | ||
6 | test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/articles.courant.com.txt b/inc/3rdparty/site_config/standard/articles.courant.com.txt deleted file mode 100755 index 984d81de..00000000 --- a/inc/3rdparty/site_config/standard/articles.courant.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1 | ||
2 | date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"] | ||
3 | author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3] | ||
4 | |||
5 | strip_id_or_class: mod-article-byline | ||
6 | strip_id_or_class: mod-article-header | ||
7 | strip_id_or_class: mod-article-subtitle | ||
8 | #This leaves some crud after the article, but it's better than nothing. | ||
9 | #It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element. | ||
10 | |||
11 | test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt deleted file mode 100755 index a76c2d02..00000000 --- a/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[contains(@class, "article_body")] | ||
2 | # print view | ||
3 | body: //div[@id='print_facet']//div[@id='body'] | ||
4 | |||
5 | tidy: no | ||
6 | prune: no | ||
7 | |||
8 | single_page_link: concat(substring-before(//div[@id="echo_container_a"]/@guid, '_story.html'), '_print.html') | ||
9 | |||
10 | test_url: http://articles.washingtonpost.com/2011-10-22/world/35279694_1_germany-acts-german-leaders-chancellor-angela-merkel | ||
11 | test_url: http://articles.washingtonpost.com/2013-05-31/opinions/39658000_1_chemical-weapons-mass-destruction-cartels \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/asahi.com.txt b/inc/3rdparty/site_config/standard/asahi.com.txt deleted file mode 100755 index b4eec7bd..00000000 --- a/inc/3rdparty/site_config/standard/asahi.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id='HeadLine'] | ||
2 | strip: //div[@id='utility_right'] | ||
3 | test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ascarter.net.txt b/inc/3rdparty/site_config/standard/ascarter.net.txt deleted file mode 100755 index 0327e846..00000000 --- a/inc/3rdparty/site_config/standard/ascarter.net.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1[@class='article_title'] | ||
2 | author: //span[@class='author'] | ||
3 | date: //h2[@class='dateline'] | ||
4 | body: //div[@class='article_body'] | ||
5 | test_url: http://ascarter.net/2012/02/20/enough-is-enough.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/astronews.com.txt b/inc/3rdparty/site_config/standard/astronews.com.txt deleted file mode 100755 index 8de22270..00000000 --- a/inc/3rdparty/site_config/standard/astronews.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //span[@class='titel'] | ||
2 | author: //span[@class='metadaten_C']/a//span[@class='metadaten_C'] | ||
3 | date: substring-after(//span[@class='metadaten_C'],'astronews.com') | ||
4 | strip: //span[@class='bu'] | ||
5 | strip_image_src: '/_images/' | ||
6 | |||
7 | test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/asymco.com.txt b/inc/3rdparty/site_config/standard/asymco.com.txt deleted file mode 100755 index f639b048..00000000 --- a/inc/3rdparty/site_config/standard/asymco.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # Johannes Stühler | ||
2 | |||
3 | title://h2 | ||
4 | author://span[@class='meta-content'] | ||
5 | date://abbr[@class='date published']/@title | ||
6 | body://div[@class='entry-content'] | ||
7 | |||
8 | test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/autoblog.com.txt b/inc/3rdparty/site_config/standard/autoblog.com.txt deleted file mode 100755 index 291db992..00000000 --- a/inc/3rdparty/site_config/standard/autoblog.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | prune: no | ||
2 | body: //div[@class='post-body'] | ||
3 | author: //p[@class='byline']//a | ||
4 | date: substring-after(//div[@class='about']/p[2], 'Posted') | ||
5 | strip: //div[@class='body']/div[@class='meta'] | ||
6 | test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/autocar.co.uk.txt b/inc/3rdparty/site_config/standard/autocar.co.uk.txt deleted file mode 100755 index 9f4fe18b..00000000 --- a/inc/3rdparty/site_config/standard/autocar.co.uk.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //div[@class='col-center']/h1 | ||
2 | author: //div[@class='personality']/a | ||
3 | date: //div[@class='personality-date'] | ||
4 | body: //div[@class='content-top ']//div[@class='content'][1] | //div[contains(@class,'article-body')] | //div[contains(@class,'main-article')] | ||
5 | |||
6 | next_page_link: //div[@id='review-link']/a | ||
7 | |||
8 | strip: //div[@class='author-block'] | ||
9 | strip: //p//iframe[contains(@src,'signup')]/preceding::p[1] | ||
10 | |||
11 | test_url: http://www.autocar.co.uk/car-review/volkswagen/golf | ||
12 | test_url: http://www.autocar.co.uk/car-news/pebble-beach/saleen-unveils-performance-electric-vehicle-based-tesla-model-s | ||
13 | test_url: http://www.autocar.co.uk/car-review/rolls-royce/first-drives/rolls-royce-ghost-series-ii-first-drive-review | ||
diff --git a/inc/3rdparty/site_config/standard/avclub.com.txt b/inc/3rdparty/site_config/standard/avclub.com.txt deleted file mode 100755 index c365a7aa..00000000 --- a/inc/3rdparty/site_config/standard/avclub.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author: //*[@id="article_wrapper"]/div[1]/a[1] | ||
2 | body: //*[@id="article_wrapper"]/div[2] | ||
3 | date: //*[@id="article_wrapper"]/div[1]/text()[2] | ||
4 | test_url: http://www.avclub.com/articles/forgetmenot,70904 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/baltimoresun.com.txt b/inc/3rdparty/site_config/standard/baltimoresun.com.txt deleted file mode 100755 index 35b62427..00000000 --- a/inc/3rdparty/site_config/standard/baltimoresun.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | single_page_link: //div[@class='toppaginate']//a[@rel='nofollow'] | ||
2 | convert_double_br_tags: yes | ||
3 | |||
4 | title: //div[@class="story"]/h1 | ||
5 | body: //div[@id="story-body-text"] | ||
6 | author: //span[@class="byline"] | ||
7 | date: //p[@class="date"] | ||
8 | |||
9 | strip: //*[@class='all'] | ||
10 | strip: //*[@class='articlerail'] | ||
11 | |||
12 | test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/baseballprospectus.com.txt b/inc/3rdparty/site_config/standard/baseballprospectus.com.txt deleted file mode 100755 index 1207b343..00000000 --- a/inc/3rdparty/site_config/standard/baseballprospectus.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | author: //p[@class="author"]/a[1] | ||
3 | body: //div[@class="article"] | ||
4 | date: //p[@class="date"] | ||
5 | |||
6 | # remove user tools | ||
7 | strip: //div[@class='tools'] | ||
8 | strip: //h1 | ||
9 | strip: //h2[@class='subtitle'] | ||
10 | strip: //p[@class='author'] | ||
11 | strip: //p[@class='date'] | ||
12 | |||
13 | test_url: http://www.baseballprospectus.com/article.php?articleid=18463 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/basicthinking.de.txt b/inc/3rdparty/site_config/standard/basicthinking.de.txt deleted file mode 100755 index f08c1f26..00000000 --- a/inc/3rdparty/site_config/standard/basicthinking.de.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h2 | ||
2 | date: //span[@class='date'] | ||
3 | body: //div[@class='entry'] | ||
4 | |||
5 | strip: //div[@class='zusatz'] | ||
6 | |||
7 | test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bb.is.txt b/inc/3rdparty/site_config/standard/bb.is.txt deleted file mode 100755 index 57f7fdfa..00000000 --- a/inc/3rdparty/site_config/standard/bb.is.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20) | ||
2 | |||
3 | |||
4 | date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12) | ||
5 | |||
6 | |||
7 | body: //div[@class='first-article-big'] | ||
8 | strip: //table[@class='newsimagecontainer'] | ||
9 | strip: //h3[@class='headlines'] | ||
10 | strip: //iframe[@class='headlines'] | ||
11 | strip: //a[@class='newslink'] | ||
12 | convert_double_br_tags: yes | ||
13 | test_url: http://bb.is/Pages/82?NewsID=174119 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt deleted file mode 100755 index bad77654..00000000 --- a/inc/3rdparty/site_config/standard/bbc.co.uk.txt +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | body: //div[@class="story-body"] | ||
2 | # for video entries | ||
3 | body: //div[contains(@class, "videoInStory") or @id="meta-information"] | ||
4 | title: //h1[@class="story-header"] | ||
5 | date: //span[@class="story-date"]/span[@class='date'] | ||
6 | # for sport site | ||
7 | date: //meta[@name='DCTERMS.created']/@content | ||
8 | author: //div[@id='headline']//span[@class='byline-name'] | ||
9 | |||
10 | # recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 | ||
11 | body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] | ||
12 | |||
13 | #strip: //div[@class="story-feature narrow"] | ||
14 | #strip: //div[@class="story-feature wide"] | ||
15 | #strip: //div[@class="story-feature dslideshow-enclosure"] | ||
16 | strip: //div[contains(@class, "story-feature") and not(contains(@class, 'full-width'))] | ||
17 | strip: //span[@class="story-date"] | ||
18 | #strip: //div[@class="caption body-narrow-width"] | ||
19 | strip: //div[@class="warning"]//p | ||
20 | strip: //div[@id='page-bookmark-links-head'] | ||
21 | strip: //object | ||
22 | strip: //div[contains(@class, "bbccom_advert_placeholder")] | ||
23 | strip: //div[contains(@class, "embedded-hyper")] | ||
24 | strip: //div[contains(@class, 'market-data')] | ||
25 | strip: //a[contains(@class, 'hidden')] | ||
26 | strip: //div[contains(@class, 'hypertabs')] | ||
27 | strip: //div[contains(@class, 'related')] | ||
28 | strip: //form[@id='comment-form'] | ||
29 | strip: //div[contains(@class, 'comment-introduction')] | ||
30 | strip: //div[contains(@class, 'share-tools')] | ||
31 | strip: //div[@id='also-related-links'] | ||
32 | |||
33 | strip_id_or_class: share-help | ||
34 | strip_id_or_class: comments_module | ||
35 | |||
36 | replace_string(<noscript>): <div> | ||
37 | replace_string(</noscript>): </div> | ||
38 | |||
39 | tidy: no | ||
40 | prune: no | ||
41 | |||
42 | dissolve: //h2 | ||
43 | |||
44 | test_url: http://www.bbc.co.uk/sport/0/football/23224017 | ||
45 | test_contains: Swansea City have completed the club-record signing | ||
46 | |||
47 | test_url: http://www.bbc.co.uk/news/business-15060862 | ||
48 | test_contains: Europe's leaders are meeting again to try to solve | ||
49 | |||
50 | # news feed | ||
51 | test_url: http://feeds.bbci.co.uk/news/rss.xml | ||
52 | # sports feed | ||
53 | test_url: http://feeds.bbci.co.uk/sport/0/football/rss.xml?edition=int | ||
54 | # video entry | ||
55 | test_url: http://www.bbc.co.uk/news/world-asia-22056933 | ||
diff --git a/inc/3rdparty/site_config/standard/bbc.com.txt b/inc/3rdparty/site_config/standard/bbc.com.txt deleted file mode 100755 index c04a683e..00000000 --- a/inc/3rdparty/site_config/standard/bbc.com.txt +++ /dev/null | |||
@@ -1,60 +0,0 @@ | |||
1 | body: //div[@class="story-body"] | ||
2 | # for video entries | ||
3 | body: //div[contains(@class, "videoInStory") or @id="meta-information"] | ||
4 | title: //h1[@class="story-header"] | ||
5 | date: //span[@class="story-date"]/span[@class='date'] | ||
6 | # for sport site | ||
7 | date: //meta[@name='DCTERMS.created']/@content | ||
8 | author: //div[@id='headline']//span[@class='byline-name'] | ||
9 | |||
10 | # recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 | ||
11 | body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] | ||
12 | |||
13 | #strip: //div[@class="story-feature narrow"] | ||
14 | #strip: //div[@class="story-feature wide"] | ||
15 | #strip: //div[@class="story-feature dslideshow-enclosure"] | ||
16 | strip: //div[contains(@class, "story-feature") and not(contains(@class, 'full-width'))] | ||
17 | strip: //span[@class="story-date"] | ||
18 | #strip: //div[@class="caption body-narrow-width"] | ||
19 | strip: //div[@class="warning"]//p | ||
20 | strip: //div[@id='page-bookmark-links-head'] | ||
21 | strip: //object | ||
22 | strip: //div[contains(@class, "bbccom_advert_placeholder")] | ||
23 | strip: //div[contains(@class, "embedded-hyper")] | ||
24 | strip: //div[contains(@class, 'market-data')] | ||
25 | strip: //a[contains(@class, 'hidden')] | ||
26 | strip: //div[contains(@class, 'hypertabs')] | ||
27 | strip: //div[contains(@class, 'related')] | ||
28 | strip: //form[@id='comment-form'] | ||
29 | strip: //div[contains(@class, 'comment-introduction')] | ||
30 | strip: //div[contains(@class, 'share-tools')] | ||
31 | strip: //div[@id='also-related-links'] | ||
32 | |||
33 | strip_id_or_class: share-help | ||
34 | strip_id_or_class: comments_module | ||
35 | |||
36 | replace_string(<noscript>): <div> | ||
37 | replace_string(</noscript>): </div> | ||
38 | |||
39 | native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')] | ||
40 | |||
41 | tidy: no | ||
42 | prune: no | ||
43 | |||
44 | dissolve: //h2 | ||
45 | |||
46 | test_url: http://www.bbc.com/sport/0/football/28918021 | ||
47 | test_contains: Cameroonian footballer Albert Ebosse has died | ||
48 | |||
49 | test_url: http://www.bbc.com/sport/0/football/23224017 | ||
50 | |||
51 | test_url: http://www.bbc.com/news/business-15060862 | ||
52 | test_contains: Europe's leaders are meeting again to try | ||
53 | |||
54 | |||
55 | # news feed | ||
56 | test_url: http://feeds.bbci.co.uk/news/rss.xml | ||
57 | # sports feed | ||
58 | test_url: http://feeds.bbci.co.uk/sport/0/football/rss.xml?edition=int | ||
59 | # video entry | ||
60 | test_url: http://www.bbc.com/news/world-asia-22056933 | ||
diff --git a/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt b/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt deleted file mode 100755 index 1547d625..00000000 --- a/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //header//h1 | ||
2 | #body: //article[contains(@class, 'node-full')] | ||
3 | body: //div[contains(@class, 'recipe-details') or contains(@class, 'tips-carousel')] | //section[@id='recipe-ingredients' or @id='recipe-method'] | ||
4 | |||
5 | strip_id_or_class: recipe-rating-wrapper | ||
6 | strip_id_or_class: magazine-subcribe-header | ||
7 | strip_id_or_class: hide | ||
8 | strip_id_or_class: recipe-actions | ||
9 | strip_id_or_class: buy-ingredients | ||
10 | strip_id_or_class: related-content | ||
11 | strip_id_or_class: recipe-magazine-ad | ||
12 | strip_id_or_class: copy-right | ||
13 | |||
14 | prune: no | ||
15 | |||
16 | test_url: http://www.bbcgoodfood.com/recipes/1131634/minced-beef-wellington \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/benoitmaison.org.txt b/inc/3rdparty/site_config/standard/benoitmaison.org.txt deleted file mode 100755 index 72c1baed..00000000 --- a/inc/3rdparty/site_config/standard/benoitmaison.org.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | body: //div[@class="entry-content"] | ||
2 | |||
3 | # Remove text ‘Tweet’ | ||
4 | strip: //div[@class="entry-content"]/div[last()] | ||
5 | |||
6 | title: h1[@class="entry-title"] | ||
7 | |||
8 | # If the Instapaper text parser worked with HTML5 tags, we would use: | ||
9 | date: //time[@class="entry-date"] | ||
10 | |||
11 | # But since it does not, use this more complicated rule: | ||
12 | date: //div[@class="entry-meta"]/a[@rel="bookmark"] | ||
13 | |||
14 | # Unfortunately, the following rule is overridden by the automatically found author. | ||
15 | author: ("Benoit Maison") | ||
16 | test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/berlingske.dk.txt b/inc/3rdparty/site_config/standard/berlingske.dk.txt deleted file mode 100755 index 9f8c41c6..00000000 --- a/inc/3rdparty/site_config/standard/berlingske.dk.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h1[@class='headline'] | ||
2 | body: //div[contains(@class, 'article-wrapper')] | ||
3 | test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bernama.com.txt b/inc/3rdparty/site_config/standard/bernama.com.txt deleted file mode 100755 index fdc04b7f..00000000 --- a/inc/3rdparty/site_config/standard/bernama.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[contains(@class, "NewsText"] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://www.bernama.com/bernama/v7/rss/english.php | ||
5 | test_url: http://www.bernama.com/bernama/v7/newsindex.php?id=943513 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/betabeat.com.txt b/inc/3rdparty/site_config/standard/betabeat.com.txt deleted file mode 100755 index 7815cf26..00000000 --- a/inc/3rdparty/site_config/standard/betabeat.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class="entry-content"] | ||
2 | test_url: http://www.betabeat.com/2011/07/04/sheryl-sandberg-breaks-through-silicon-valleys-boys-club-sort-of/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/betanews.com.txt b/inc/3rdparty/site_config/standard/betanews.com.txt deleted file mode 100755 index 90a54a23..00000000 --- a/inc/3rdparty/site_config/standard/betanews.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | # some articles at this site like this one doesn't | ||
2 | # seem to pick up the article body via normal | ||
3 | # processing, other articles come through fine | ||
4 | # http://www.betanews.com/joewilcox/article | ||
5 | # /Google-is-a-marketing-sensation/1309708375 | ||
6 | body: //*[@id="article"] | ||
7 | test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/biography.com.txt b/inc/3rdparty/site_config/standard/biography.com.txt deleted file mode 100755 index e431037a..00000000 --- a/inc/3rdparty/site_config/standard/biography.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div[contains(@class, 'main-content')]//h1 | ||
2 | body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')] | ||
7 | |||
8 | test_url: http://www.biography.com/print/profile/martin-luther-9389283 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bit-tech.net.txt b/inc/3rdparty/site_config/standard/bit-tech.net.txt deleted file mode 100755 index c6f5b204..00000000 --- a/inc/3rdparty/site_config/standard/bit-tech.net.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | body: //div[@id='column_1'] | ||
2 | next_page_link: //div[@class='next']/a[not(contains(@href, '/comments') or contains(@href, '/news/'))] | ||
3 | prune: no | ||
4 | |||
5 | author: substring-after(//p[@class='byline'], 'by ') | ||
6 | date: substring-before(substring-after(//p[@class='byline'], 'on '), ' by') | ||
7 | |||
8 | strip: //h1 | ||
9 | strip_id_or_class: socialLinks | ||
10 | strip_id_or_class: byline | ||
11 | strip_id_or_class: pageSelector | ||
12 | strip_id_or_class: articleTabs | ||
13 | strip_id_or_class: pageNav | ||
14 | strip_id_or_class: share | ||
15 | strip_id_or_class: commentsContainer | ||
16 | strip_id_or_class: below_article_related | ||
17 | |||
18 | test_url: http://www.bit-tech.net/hardware/storage/2014/08/13/ocz-arc-100-240gb-review/1 | ||
19 | test_url: http://www.bit-tech.net/news/bits/2014/08/15/google-trojan/1 | ||
diff --git a/inc/3rdparty/site_config/standard/bitelia.com.txt b/inc/3rdparty/site_config/standard/bitelia.com.txt deleted file mode 100755 index 7bffae93..00000000 --- a/inc/3rdparty/site_config/standard/bitelia.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://bitelia.com/2011/09/klout-midiendo-influencia \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bizjournals.com.txt b/inc/3rdparty/site_config/standard/bizjournals.com.txt deleted file mode 100755 index cfba766f..00000000 --- a/inc/3rdparty/site_config/standard/bizjournals.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | date: //meta[@name='publish-date']/@content | ||
2 | body: //div[contains(@class, 'articleContentWrapper')] | ||
3 | prune: no | ||
4 | |||
5 | strip: //div[contains(@class, 'staff_info')]//dd[contains(., 'Twitter')] | ||
6 | |||
7 | strip_id_or_class: related_content | ||
8 | strip_id_or_class: enlarge | ||
9 | strip_id_or_class: photoBy | ||
10 | strip_id_or_class: older | ||
11 | |||
12 | test_url: http://www.bizjournals.com/cincinnati/news/2013/10/03/harris-teeter-shareholders-vote-on.html | ||
13 | test_url: http://feeds.bizjournals.com/industry_20?format=xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bjango.com.txt b/inc/3rdparty/site_config/standard/bjango.com.txt deleted file mode 100755 index 0fed5526..00000000 --- a/inc/3rdparty/site_config/standard/bjango.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[@class='articlehead'] | ||
2 | body: //div[@class='column'] | ||
3 | strip: //h1 | ||
4 | strip: //div[@class='help'] | ||
5 | |||
6 | #no author or date/time provided in current layout | ||
7 | test_url: http://bjango.com/articles/actions/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bleacherreport.com.txt b/inc/3rdparty/site_config/standard/bleacherreport.com.txt deleted file mode 100755 index 9205e44e..00000000 --- a/inc/3rdparty/site_config/standard/bleacherreport.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | body: //div[contains(@class, 'article_pages')] | ||
2 | |||
3 | strip_id_or_class: article_page-header | ||
4 | strip_id_or_class: paginator | ||
5 | strip_id_or_class: article_info | ||
6 | |||
7 | find_string: src="data:image | ||
8 | replace_string: ignore-src="data:image | ||
9 | find_string: data-defer-src=" | ||
10 | replace_string: src=" | ||
11 | |||
12 | prune: no | ||
13 | |||
14 | test_url: http://bleacherreport.com/articles/feed | ||
15 | test_url: http://bleacherreport.com/articles/2137787-christian-ponders-newborn-daughter-was-named-after-fsu-legend-bobby-bowden | ||
16 | test_url: http://bleacherreport.com/articles/2137596-college-football-week-1-picks-unlv-runnin-rebels-vs-arizona-wildcats/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.arsln.org.txt b/inc/3rdparty/site_config/standard/blog.arsln.org.txt deleted file mode 100755 index 7ac8cc11..00000000 --- a/inc/3rdparty/site_config/standard/blog.arsln.org.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | date: //article/header/h6/time | ||
4 | title: //article/header/h3 | ||
5 | author: //meta[@name='author']/@content | ||
6 | body: //article//post | ||
7 | |||
8 | test_url: http://blog.arsln.org/aska-ayip-oluyor/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt deleted file mode 100755 index 78d7f516..00000000 --- a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //title | ||
2 | author: //span[@class='author vcard']/a | ||
3 | date: //p[@class='headline_meta']/abbr[@class='published'] | ||
4 | body: //div[@class='format_text entry-content'] | ||
5 | |||
6 | strip: //div[@id='dd_ajax_float'] | ||
7 | test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt deleted file mode 100755 index db80a35f..00000000 --- a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # Instapaper gets this back to front and only gets the blog title instead of the article title. | ||
2 | title: substring-before(//title, '-') | ||
3 | |||
4 | author: //a[ contains(@href, '/people') ] | ||
5 | |||
6 | body: //div[ @class='post' ] | ||
7 | |||
8 | # Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous. | ||
9 | test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.fefe.de.txt b/inc/3rdparty/site_config/standard/blog.fefe.de.txt deleted file mode 100755 index 97e48e69..00000000 --- a/inc/3rdparty/site_config/standard/blog.fefe.de.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h2 | ||
2 | date: //h3 | ||
3 | body: //ul | ||
4 | |||
5 | test_url: http://blog.fefe.de/?ts=b063bf55 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.instagram.com.txt b/inc/3rdparty/site_config/standard/blog.instagram.com.txt deleted file mode 100755 index 13d1d44a..00000000 --- a/inc/3rdparty/site_config/standard/blog.instagram.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | # clean Instagram blog a little bit | ||
2 | |||
3 | tidy:no | ||
4 | prune:no | ||
5 | |||
6 | body://div[contains(@id,'content')] | ||
7 | |||
8 | strip_id_or_class:meta | ||
9 | strip_id_or_class:notes | ||
10 | strip_id_or_class:pagination | ||
11 | test_url: http://blog.instagram.com/post/8757832007/fromwhereistand \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.instapaper.com.txt b/inc/3rdparty/site_config/standard/blog.instapaper.com.txt deleted file mode 100755 index fda01b15..00000000 --- a/inc/3rdparty/site_config/standard/blog.instapaper.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | author: //a[@href="http://www.marco.org/about"] | ||
2 | date: //span[@class="date"] | ||
3 | |||
4 | # Remove the date from article body. | ||
5 | strip: //span[@class="date"] | ||
6 | |||
7 | # Remove pagination links from article body. | ||
8 | strip: //div[@id="pagination"] | ||
9 | test_url: http://blog.instapaper.com/post/31303984531 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt deleted file mode 100755 index e89ad3a5..00000000 --- a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | date: //span[contains(@class, 'date-links')] | ||
2 | author: //span[contains(@class, 'author-links')] | ||
3 | body: //div[contains(@class, 'entry-content')] | ||
4 | test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt deleted file mode 100755 index bcd3bdc9..00000000 --- a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //*[contains(@class, 'post_content')] | ||
2 | author: string('Kaelig Deloumeau-Prigent') | ||
3 | title: //h1[@class='title'] | ||
4 | date: //span[@class='date'] | ||
5 | test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.naver.com.txt b/inc/3rdparty/site_config/standard/blog.naver.com.txt deleted file mode 100755 index 73c30c47..00000000 --- a/inc/3rdparty/site_config/standard/blog.naver.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //span[@class='pcol1 itemSubjectBoldfont'] | ||
2 | body: //div[@id='postListBody'] | ||
3 | date: //p[@class='date fil5 pcol2'] | ||
4 | single_page_link: /html/frameset/frame[1]/attribute::src | ||
5 | strip: //div[@class='post-btn'] | ||
6 | test_url: http://blog.naver.com/how2invest/110135068757 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.pchome.net.txt b/inc/3rdparty/site_config/standard/blog.pchome.net.txt deleted file mode 100755 index de81beba..00000000 --- a/inc/3rdparty/site_config/standard/blog.pchome.net.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | # PCHOME blog, a popular Chinese blog host | ||
2 | # Oct 15, 2011 | ||
3 | # | ||
4 | |||
5 | title://*[contains(@class,'imp')]/h2 | ||
6 | |||
7 | date://*[contains(@class,'imp')]/span | ||
8 | body://div[contains(@id,'blog_content')] | ||
9 | |||
10 | |||
11 | |||
12 | test_url: http://blog.pchome.net/article/462502.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt deleted file mode 100755 index 40f0c560..00000000 --- a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //a[@class="blog_title"] | ||
2 | date: //p[@class="when"]/a | ||
3 | body: //div[@class="blog_entry"] | ||
4 | strip_id_or_class:blog_title | ||
5 | strip_id_or_class:when | ||
6 | test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.renren.com.txt b/inc/3rdparty/site_config/standard/blog.renren.com.txt deleted file mode 100755 index 401d31e5..00000000 --- a/inc/3rdparty/site_config/standard/blog.renren.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://blog.renren.com/share/224959024/14260739544 | ||
3 | # http://blog.renren.com/share/231323504/14261768898 | ||
4 | # http://blog.renren.com/share/230305019/1502806705 | ||
5 | |||
6 | title://h1[contains(@class, 'title-article')] | ||
7 | author://span[contains(@class, 'name')] | ||
8 | body://div[contains(@class, 'content-body')] | ||
9 | |||
10 | convert_double_br_tags:yes | ||
11 | test_url: http://blog.renren.com/share/230305019/1502806705 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt deleted file mode 100755 index 4895272a..00000000 --- a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | # Sina blog, the most popular blog host in China. | ||
2 | # Its source code is horrible. | ||
3 | # | ||
4 | # Issue: | ||
5 | # Only the first image in the article is displayed. | ||
6 | # The rest images are replace by a 1x1 transparent gif by sina blog host. | ||
7 | # | ||
8 | |||
9 | title://*[contains(@class,'titName SG_txta')] | ||
10 | author://*[contains(@id,'ownernick')] | ||
11 | date://*[contains(@class,'time SG_txtc')] | ||
12 | body://div[contains(@class,'articalContent')] | ||
13 | |||
14 | # Remove redundant content which has span class start with "MASS" | ||
15 | # Example <span class="MASSf21674ffeef7"></span> | ||
16 | strip://span[contains(@class,'MASS')] | ||
17 | |||
18 | # Remove comment | ||
19 | strip://div[contains(@class,'allComm')] | ||
20 | |||
21 | # Remove hiden text and link | ||
22 | strip://ins | ||
23 | |||
24 | tidy:no | ||
25 | convert_double_br_tags:yes | ||
26 | test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.spu.edu.txt b/inc/3rdparty/site_config/standard/blog.spu.edu.txt deleted file mode 100755 index 68bd4e39..00000000 --- a/inc/3rdparty/site_config/standard/blog.spu.edu.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body://div[@class='post'] | ||
2 | test_url: http://blog.spu.edu/lectio/from-the-frying-pan-into-the-fire/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.wells.ee.txt b/inc/3rdparty/site_config/standard/blog.wells.ee.txt deleted file mode 100755 index eae6982b..00000000 --- a/inc/3rdparty/site_config/standard/blog.wells.ee.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2/a[@class="no-link title"] | ||
2 | author: //h2[@id="blog_owner"] | ||
3 | date: //time | ||
4 | strip: //h2/a[@class="no-link title"] | ||
5 | test_url: http://blog.wells.ee/retina | ||
6 | test_url: http://blog.wells.ee/skeuomorphism \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt deleted file mode 100755 index 2a66952b..00000000 --- a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # 2011-08-23 [carlo@...] Initial version. | ||
2 | |||
3 | author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text() | ||
4 | |||
5 | # why yes, I do feel a bit dirty | ||
6 | date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " ) | ||
7 | |||
8 | test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.faz.net.txt b/inc/3rdparty/site_config/standard/blogs.faz.net.txt deleted file mode 100755 index 4f2626f1..00000000 --- a/inc/3rdparty/site_config/standard/blogs.faz.net.txt +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | # Set author | ||
7 | author: //a[@rel='author'] | ||
8 | |||
9 | # Set date | ||
10 | date: //span[@class='Datum'] | ||
11 | |||
12 | # Content is here | ||
13 | body: //div[@class='Artikel'] | ||
14 | |||
15 | # Tidy up before article | ||
16 | strip: //div[@id='FAZHeaderNeu'] | ||
17 | strip: //h2[@itemprop='headline'] | ||
18 | strip: //span[@class='Datum'] | ||
19 | strip: //span[@class='Autor'] | ||
20 | strip_id_or_class: ArticlePagerTop | ||
21 | strip: //div[@class='FAZArtikelEinleitung']/h2 | ||
22 | |||
23 | # General cleanup | ||
24 | strip: //div[@class='clear'] | ||
25 | strip: //span[@class='Bildnachweis'] | ||
26 | strip: //iframe | ||
27 | strip_id_or_class: Community | ||
28 | strip: ' · ' | ||
29 | |||
30 | # Remove tracking and ads | ||
31 | strip_image_src: /l.gif? | ||
32 | strip: //img[@width='1'] | ||
33 | strip_id_or_class: invisible | ||
34 | strip_id_or_class: Anzeige | ||
35 | strip_id_or_class: billboard | ||
36 | |||
37 | # Remove clutter after article | ||
38 | strip_id_or_class: Tagline | ||
39 | strip_id_or_class: ArtikelAbbinder | ||
40 | strip_id_or_class: FAZArtikelKommentare | ||
41 | strip_id_or_class: ArtikelKommentieren | ||
42 | strip_id_or_class: FAZContentRight | ||
43 | |||
44 | # Try it yourself | ||
45 | test_url: http://blogs.faz.net/wost/2014/08/17/viel-fuck-und-wenig-guter-sex-1239/ | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt deleted file mode 100755 index 86580d21..00000000 --- a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | test_url: http://blogs.forbes.com/adamhartung/2011/04/08/apple-is-better-managed-than-microsoft/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt deleted file mode 100755 index d47c3520..00000000 --- a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //div[@id='pageFeature']/h1 | ||
2 | body: //div[@id='articleBody'] | ||
3 | strip: //div[@class='module wide'] | ||
4 | test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt deleted file mode 100755 index b2ff8332..00000000 --- a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h3[@class="post-name"] | ||
2 | author: //span[@class="user-name"] | ||
3 | date: //div[@class="post-date"] | ||
4 | body: //div[@class="post-content user-defined-markup"] | ||
5 | footnotes: no | ||
6 | test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt deleted file mode 100755 index d3eb9966..00000000 --- a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //div[@id='single']/h1 | ||
2 | body: //div[@id='postcontent'] | ||
3 | test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt deleted file mode 100755 index 2102015d..00000000 --- a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | # meta data | ||
2 | title://h1[@class = 'postTitle'] | ||
3 | author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') | ||
4 | date://span[@class = 'datestamp'] | ||
5 | |||
6 | #body content | ||
7 | body://div[@id = 'singleBlogPost'] | ||
8 | |||
9 | #reclaim author info | ||
10 | move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] | ||
11 | strip://p[@class = 'moreLink mobileHide'] | ||
12 | |||
13 | #cleanup comments, there might be some open <div> sections | ||
14 | strip://div[@id = 'comments2'] | ||
15 | strip://h3[a[@href = '#add-comment']] | ||
16 | test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt deleted file mode 100755 index 1bc65e77..00000000 --- a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | # metadata | ||
2 | author://div[@class = 'post']/div[@class='meta']/a[1] | ||
3 | date://div[@id = 'rap']/h2[1] | ||
4 | body://div[@class = 'post'] | ||
5 | |||
6 | # wrapping caption and image | ||
7 | wrap_in(fieldset)://div[contains(@class, 'wp-caption')] | ||
8 | |||
9 | |||
10 | # clean up | ||
11 | strip://div[@class = 'post']/h3[@class = 'storytitle'] | ||
12 | strip://div[@class = 'post']/div[@class = 'social'] | ||
13 | strip://img[@style = 'display:none;'] | ||
14 | strip://img[@height='0' and @width='0'] | ||
15 | test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.technet.com.txt b/inc/3rdparty/site_config/standard/blogs.technet.com.txt deleted file mode 100755 index 3d0fbadc..00000000 --- a/inc/3rdparty/site_config/standard/blogs.technet.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h3[@class="post-name"] | ||
2 | author: //span[@class="user-name"] | ||
3 | date: //div[@class="post-date"] | ||
4 | body: //div[@class="post-content user-defined-markup"] | ||
5 | strip_id_or_class: log-feedback-list | ||
6 | tidy: no | ||
7 | footnotes: no | ||
8 | test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx | ||
9 | test_url: http://blogs.technet.com/b/isablog/archive/2009/01/07/a-pptp-client-might-fail-to-connect-to-a-vpn-server-on-the-internet-through-an-isa-server-2006.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bluetouff.com.txt b/inc/3rdparty/site_config/standard/bluetouff.com.txt deleted file mode 100755 index 543d3920..00000000 --- a/inc/3rdparty/site_config/standard/bluetouff.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body://div[@class='entry'] | ||
2 | date://div[@class='meta'] | ||
3 | strip://a[@class='FlattrButton'] | ||
4 | test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boagworld.com.txt b/inc/3rdparty/site_config/standard/boagworld.com.txt deleted file mode 100755 index 3b3da991..00000000 --- a/inc/3rdparty/site_config/standard/boagworld.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class="entry-title"][2] | ||
2 | author: string("Paul Boag") | ||
3 | date: substring(//span[@class="meta"], 11) | ||
4 | body: //article | ||
5 | strip: //h2 | ||
6 | strip: //h1 | ||
7 | strip: //div[@id="callsToAction"] | ||
8 | test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boingboing.net.txt b/inc/3rdparty/site_config/standard/boingboing.net.txt deleted file mode 100755 index 4f39661b..00000000 --- a/inc/3rdparty/site_config/standard/boingboing.net.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | # This is far from perfect, but so is BoingBoing's markup | ||
2 | title: //h2[@class="headline"] | ||
3 | single_page_link: //h2[@class="headline"]/a | ||
4 | #date: //p[@class="byline"] | ||
5 | body: //div[@class="post"] | ||
6 | |||
7 | strip_id_or_class: shareMe | ||
8 | strip_id_or_class: authorbox | ||
9 | strip_id_or_class: byline | ||
10 | |||
11 | test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt deleted file mode 100755 index 3616bbf2..00000000 --- a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h2[@class='entry-title'] | ||
2 | body: //div[@class='entry-content'] | ||
3 | test_url: http://boldizsar.palotas.eu/blog/?p=1394 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/book.douban.com.txt b/inc/3rdparty/site_config/standard/book.douban.com.txt deleted file mode 100755 index fe2d2cbf..00000000 --- a/inc/3rdparty/site_config/standard/book.douban.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //span[@property='v:description'] | ||
2 | date: //span[@property='v:dtreviewed'] | ||
3 | author: //span[@property='v:reviewer'] | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://book.douban.com/review/2422662/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bookforum.com.txt b/inc/3rdparty/site_config/standard/bookforum.com.txt deleted file mode 100755 index 03b60039..00000000 --- a/inc/3rdparty/site_config/standard/bookforum.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | #metadata | ||
2 | title://div[@class = 'Topper']/h1 | ||
3 | author://div[@class = 'Topper']/h3 | ||
4 | date://div[@class = 'Topper']/h6 | ||
5 | body://div[@class = 'Core'] | ||
6 | |||
7 | |||
8 | |||
9 | # clean up | ||
10 | strip://div[@class = 'Topper']/h1 | ||
11 | strip://div[@class = 'Topper']/h3 | ||
12 | strip://div[@class = 'Topper']/h4 | ||
13 | strip://div[@class = 'Topper']/h5 | ||
14 | strip://div[@class = 'Topper']/h6 | ||
15 | strip://br[@clear = 'all'] | ||
16 | strip://div[@class = 'adCore'] | ||
17 | strip://div[@class = 'BookR'] | ||
18 | strip://div[@class = 'InfoBox'] | ||
19 | test_url: http://bookforum.com/inprint/018_04/8595 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt deleted file mode 100755 index b4e116fe..00000000 --- a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title://h1 | ||
2 | author://div[@class="meta"]/span/a | ||
3 | date://div[@class="date"] | ||
4 | body://div[@class="content article"] | ||
5 | strip://div[@class="content article"]/h1 | ||
6 | |||
7 | test_url: http://borderhouseblog.com/?p=7832 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bostonglobe.com.txt b/inc/3rdparty/site_config/standard/bostonglobe.com.txt deleted file mode 100755 index 4c74a34e..00000000 --- a/inc/3rdparty/site_config/standard/bostonglobe.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | # NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com. | ||
2 | |||
3 | title: //div[@class="header"]/h1 | ||
4 | author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ") | ||
5 | date: //div[@class="byline"]/p[last()] | ||
6 | body: //div[@class="article-body"] | ||
7 | |||
8 | strip_id_or_class: aside | ||
9 | strip_id_or_class: promo | ||
10 | strip_id_or_class: skip-nav | ||
11 | strip_id_or_class: article-more | ||
12 | strip_id_or_class: article-bar | ||
13 | |||
14 | # This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed. | ||
15 | strip_id_or_class: figure | ||
16 | test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bostonreview.net.txt b/inc/3rdparty/site_config/standard/bostonreview.net.txt deleted file mode 100755 index 64e04a1c..00000000 --- a/inc/3rdparty/site_config/standard/bostonreview.net.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | #basics | ||
2 | title://h3[@class = 'article_title'] | ||
3 | date://span[@class = 'article_date'] | ||
4 | body://div[@id = 'center_column_article'] | ||
5 | #correct, but author not being picked up in preview | ||
6 | author://span[@class = 'article_author'] | ||
7 | |||
8 | #strips basics from article | ||
9 | strip_id_or_class:article_title | ||
10 | strip_id_or_class:article_date | ||
11 | strip_id_or_class:article_author | ||
12 | |||
13 | #strips pull quotes | ||
14 | strip_id_or_class:pull_quote | ||
15 | test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boundlessline.org.txt b/inc/3rdparty/site_config/standard/boundlessline.org.txt deleted file mode 100755 index a836e1e2..00000000 --- a/inc/3rdparty/site_config/standard/boundlessline.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: substring-before(//title, '|') | ||
2 | body: //div[@class="entry"] | ||
3 | # Remove the author's picture | ||
4 | strip: //div[@class="entry"]/a[1] | ||
5 | test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bowdoinorient.com.txt b/inc/3rdparty/site_config/standard/bowdoinorient.com.txt deleted file mode 100755 index 932143d1..00000000 --- a/inc/3rdparty/site_config/standard/bowdoinorient.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //*[@class='articletitle'] | ||
2 | body: //*[(@id='articlebody')] | ||
3 | date: //*[(@class='articledate')] | ||
4 | author: //*[(@class='articleauthor')] | ||
5 | autodetect_next_page: no | ||
6 | test_url: http://bowdoinorient.com/article/8045 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brainfacts.org.txt b/inc/3rdparty/site_config/standard/brainfacts.org.txt deleted file mode 100755 index 9705f621..00000000 --- a/inc/3rdparty/site_config/standard/brainfacts.org.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //div[@class="standard"]/h1 | ||
2 | author: string("BrainFacts.org") | ||
3 | date: //div[@class="meta"]/strong | ||
4 | |||
5 | strip: //p[@class="skip"] | ||
6 | strip: //div[@class="meta"] | ||
7 | strip: //div[@class="standard"]/h1 | ||
8 | strip: //div[@class="modal"] | ||
9 | strip: //div[@class="columnRight"] | ||
10 | test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt deleted file mode 100755 index 36aa2efa..00000000 --- a/inc/3rdparty/site_config/standard/brandeins.de.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | # set body | ||
2 | body: //div[@id='theContent'] | ||
3 | |||
4 | # set title | ||
5 | title: //div[@id='theContent']/h3 | ||
6 | strip: //div[@id='theContent']/h3 | ||
7 | test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt deleted file mode 100755 index fc020539..00000000 --- a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | date://h2[@class="date-header"] | ||
2 | body://div[@class="entry-content"] | ||
3 | test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brasil.elpais.com.txt b/inc/3rdparty/site_config/standard/brasil.elpais.com.txt deleted file mode 100755 index 6a22dcb7..00000000 --- a/inc/3rdparty/site_config/standard/brasil.elpais.com.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | title: //meta[@name='DC.title']/@content | ||
2 | title: //div[contains(@class, 'cabecera_noticia')]//h1 | ||
3 | date: //meta[@name='DC.date']/@content | ||
4 | date: //meta[@name='date']/@content | ||
5 | body: //div[@class='columna_texto'] | ||
6 | body: //div[@id='cuerpo_noticia'] | ||
7 | body: //div[@class='estructura_2col_1zq']//div[@class='margen_n'] | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | strip_id_or_class: disposicion_vertical | ||
12 | strip_id_or_class: ampliar_foto | ||
13 | strip_id_or_class: utilidades | ||
14 | strip_id_or_class: info_relacionada | ||
15 | strip_id_or_class: m-kiosko | ||
16 | strip_id_or_class: info_complementa | ||
17 | |||
18 | strip: //p[@class='nota_pie'] | ||
19 | strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')] | ||
20 | strip: //div[@id='coment' or @id='foros_not'] | ||
21 | |||
22 | test_url: http://brasil.elpais.com/brasil/2014/10/15/politica/1413334841_878730.html | ||
23 | test_contains: O PT quer intensificar a presença do ex-presidente | ||
24 | |||
25 | test_url: http://brasil.elpais.com/brasil/2014/10/13/internacional/1413225730_450761.html | ||
26 | test_contains: Todos na localidade onde ele nasceu ainda falavam da façanha | ||
diff --git a/inc/3rdparty/site_config/standard/brettterpstra.com.txt b/inc/3rdparty/site_config/standard/brettterpstra.com.txt deleted file mode 100755 index 55da1787..00000000 --- a/inc/3rdparty/site_config/standard/brettterpstra.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='post full'] | ||
2 | title: //h1 | ||
3 | author: substring-after(//title, '- ') | ||
4 | date: //span[@class='date'] | ||
5 | test_url: http://brettterpstra.com/byword-for-ios/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt deleted file mode 100755 index 27e6b70c..00000000 --- a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='articleBody'] | ||
2 | test_url: http://www.brisbanetimes.com.au/opinion/blogs/blunt-instrument/losing-our-minds--for-24-hours-20120118-1q682.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brookings.edu.txt b/inc/3rdparty/site_config/standard/brookings.edu.txt deleted file mode 100755 index 17a47605..00000000 --- a/inc/3rdparty/site_config/standard/brookings.edu.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //div[@id='contentheader']/h1 | ||
2 | author: //p[@class='attribution']/span[@class='author']/* | ||
3 | # Is there a way to pull multiple authors? My XPath here is just grabbing the first | ||
4 | |||
5 | date: /html/head/meta[@name="date"]/@content | ||
6 | body: //div[@class='main-content'] | ||
7 | |||
8 | strip: //p[@class='byline'] | ||
9 | strip: //div[@class='img-gallery'] | ||
10 | strip: //div[@class='callout'] | ||
11 | strip: //div[@class='add-your-view'] | ||
12 | convert_double_br_tags: yes | ||
13 | test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brooksreview.net.txt b/inc/3rdparty/site_config/standard/brooksreview.net.txt deleted file mode 100755 index d33d7d4e..00000000 --- a/inc/3rdparty/site_config/standard/brooksreview.net.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@class='article'] | ||
3 | body: //div[@class='post'] | ||
4 | date: //*[@id='single']/span | ||
5 | prune: no | ||
6 | test_url: http://brooksreview.net/2011/11/readability-agency/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bt.no.txt b/inc/3rdparty/site_config/standard/bt.no.txt deleted file mode 100755 index 200c2e4e..00000000 --- a/inc/3rdparty/site_config/standard/bt.no.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h1[contains(@class,'articleTitle')] | ||
2 | author: //span[@itemprop='name'] | ||
3 | date: //time[@class='published'] | ||
4 | body: //div[contains(@class,'bodyText')] | ||
5 | |||
6 | strip_id_or_class: 'pull1' | ||
7 | strip_id_or_class: 'relationArticle' | ||
8 | strip: //span[@class='quote'] | ||
9 | |||
10 | # strip h2 if at end of article (typically a request for comments) | ||
11 | strip: //div[contains(@class,'bodyText')]/node()[last()-1]/self::h2 | ||
12 | test_url: http://www.bt.no/meninger/debatt/Typisk-norsk-a-vare-god-nok-2884108.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/buffed.de.txt b/inc/3rdparty/site_config/standard/buffed.de.txt deleted file mode 100755 index 3dd36ce6..00000000 --- a/inc/3rdparty/site_config/standard/buffed.de.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | date: //meta[@itemProp='datePublished']/@content | ||
2 | body: //div[@class='intro' or contains(@class, 'article_text')] | ||
3 | prune: no | ||
4 | strip_id_or_class: embedcode | ||
5 | strip_id_or_class: EmbedSwitch | ||
6 | strip_id_or_class: EmbedText | ||
7 | strip_id_or_class: bildergalerie | ||
8 | strip_id_or_class: subline_seohour_image | ||
9 | strip_id_or_class: ova-player | ||
10 | strip_id_or_class: jcarouseloutput | ||
11 | strip_id_or_class: cbox_embedded | ||
12 | |||
13 | test_url: http://www.buffed.de/SWTOR-Star-Wars-The-Old-Republic-PC-218697/News/SWTOR-Ab-Patch-24-Lore-Klamotten-faerben-1090051/ | ||
14 | test_url: http://www.buffed.de/feed.cfm?menu_alias=home \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/buquad.com.txt b/inc/3rdparty/site_config/standard/buquad.com.txt deleted file mode 100755 index f0fd08db..00000000 --- a/inc/3rdparty/site_config/standard/buquad.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //h2/a | ||
3 | date: substring-after(//h2, '|') | ||
4 | strip_id_or_class: 'attachment' | ||
5 | strip: //h3 | ||
6 | |||
7 | body: //div[@class='entry'] | ||
8 | test_url: http://buquad.com/2012/04/09/paul-ryan/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/business2community.com.txt b/inc/3rdparty/site_config/standard/business2community.com.txt deleted file mode 100755 index 0dcc7ff8..00000000 --- a/inc/3rdparty/site_config/standard/business2community.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | date: substring-after(//p[@class='byline'],'Published') | ||
2 | |||
3 | strip: //div[@class='article-meta'] | ||
4 | |||
5 | test_url: http://www.business2community.com/social-media/funky-ways-to-print-instagram-photos-0485340 | ||
diff --git a/inc/3rdparty/site_config/standard/businessinsider.com.txt b/inc/3rdparty/site_config/standard/businessinsider.com.txt deleted file mode 100755 index 39eb7426..00000000 --- a/inc/3rdparty/site_config/standard/businessinsider.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title://div[@class="sl-layout-post"]/h1 | ||
2 | body: //div[contains(@class, 'post-content') or contains(@class, 'slide-module') or contains(@class, 'KonaBody')] | ||
3 | strip: //div[contains(@class, "post-sidebar")] | ||
4 | strip: //div[@id='related-links'] | ||
5 | strip: //div[@class='related-links-container'] | ||
6 | strip: //p[@class='source'] | ||
7 | author://div[@class="byline"]/a | ||
8 | date://div[@class="byline"]/span[@class="date"] | ||
9 | prune: no | ||
10 | |||
11 | single_page_link: //a[contains(text(), 'View as one page')] | ||
12 | |||
13 | strip://*[contains(@class,'sponsored-text')] | ||
14 | strip: //div[@id='post_footer'] | ||
15 | |||
16 | test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1 | ||
diff --git a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt deleted file mode 100755 index 6502b8e1..00000000 --- a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@id='article_detail'] | ||
2 | title: //meta[@property='og:title']/@content | ||
3 | date: //div[@id='date_com_art']//a[@class='date'] | ||
4 | author: //div[@id='article_detail']//font[@class='auteur'] | ||
5 | |||
6 | strip_id_or_class: porte_titre_theme | ||
7 | strip_id_or_class: cont_param | ||
8 | strip_id_or_class: date_com_art | ||
9 | |||
10 | prune: no | ||
11 | |||
12 | test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt deleted file mode 100755 index f546b708..00000000 --- a/inc/3rdparty/site_config/standard/businessweek.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | # include the lead graphic in the body, if available | ||
2 | body: //div[contains(concat(' ', normalize-space(@id), ' '), ' lead_graphic ')] | //div[contains(concat(' ', normalize-space(@itemprop), ' '), ' articleBody ')] | ||
3 | title: //h1[contains(concat(' ', normalize-space(@itemprop), ' '), ' headline ')] | ||
4 | date: //time[contains(concat(' ', normalize-space(@itemprop), ' '), ' datePublished ')] | ||
5 | |||
6 | strip_id_or_class: photo_credit | ||
7 | strip_id_or_class: photo_caption | ||
8 | strip_id_or_class: inline_gallery | ||
9 | # pull quote, often inside a blockquote element | ||
10 | strip_id_or_class: pq | ||
11 | strip_id_or_class: credit | ||
12 | strip_id_or_class: figcaption | ||
13 | strip_id_or_class: related_item | ||
14 | |||
15 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html | ||
16 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall | ||
17 | test_url: http://www.businessweek.com/articles/2014-07-09/american-apparel-dov-charneys-sleazy-struggle-for-control | ||
diff --git a/inc/3rdparty/site_config/standard/buzzfeed.com.txt b/inc/3rdparty/site_config/standard/buzzfeed.com.txt deleted file mode 100755 index ea88ea47..00000000 --- a/inc/3rdparty/site_config/standard/buzzfeed.com.txt +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | # Creator: Greg Leuch <greg@...> | ||
2 | |||
3 | # It can be messy. | ||
4 | tidy:no | ||
5 | |||
6 | # The basic template. | ||
7 | title: //h1[@data-print='title'] | ||
8 | author: //a[@data-print='author'] | ||
9 | date: //time[@data-print='date'] | ||
10 | body: //div[@data-print='body'] | ||
11 | body: //section[@data-print='body'] | ||
12 | |||
13 | find_string: rel:bf_image_src= | ||
14 | replace_string: src= | ||
15 | find_string: src="data: | ||
16 | replace_string: disabled_src="data: | ||
17 | |||
18 | native_ad_clue: //meta[@property="article:section" and @content="Advertiser"] | ||
19 | |||
20 | # For various things... | ||
21 | strip: *[@data-print="ignore"] | ||
22 | test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays | ||
23 | # Native ad | ||
24 | test_url: http://www.buzzfeed.com/bravo/ways-to-up-your-online-dating-game \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bygonebureau.com.txt b/inc/3rdparty/site_config/standard/bygonebureau.com.txt deleted file mode 100755 index 63c82130..00000000 --- a/inc/3rdparty/site_config/standard/bygonebureau.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //a[contains(@href, '/author/')] | ||
3 | date: //*[@class='post-date'] | ||
4 | strip: //*[@class='post-date'] | ||
5 | strip: //h1 | ||
6 | test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cable.co.uk.txt b/inc/3rdparty/site_config/standard/cable.co.uk.txt deleted file mode 100755 index 435bf3b5..00000000 --- a/inc/3rdparty/site_config/standard/cable.co.uk.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //div[@class='page-content']//h1 | ||
2 | body: //div[@class='page-content'] | ||
3 | strip_id_or_class: editorial-bar-top | ||
4 | strip_id_or_class: social-bottom | ||
5 | strip_id_or_class: comment-form | ||
6 | strip_id_or_class: pc-why | ||
7 | |||
8 | prune: no | ||
9 | tidy: no | ||
10 | |||
11 | test_url: http://www.cable.co.uk/news/bt-vision-unveils-interactive-guide-application-800734218/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/canonrumors.com.txt b/inc/3rdparty/site_config/standard/canonrumors.com.txt deleted file mode 100755 index c22cf4f1..00000000 --- a/inc/3rdparty/site_config/standard/canonrumors.com.txt +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | # Set title | ||
7 | title: //h2 | ||
8 | |||
9 | date: //li[@class='time'] | ||
10 | |||
11 | # Set author | ||
12 | author: //a[contains(@rel, 'author')] | ||
13 | |||
14 | # Content is here | ||
15 | body: //div[@id='content'] | ||
16 | |||
17 | # Tidy up before article | ||
18 | strip: //div[@class='meta'] | ||
19 | |||
20 | # Tidy up after article | ||
21 | strip_id_or_class: nr_related_placeholder | ||
22 | strip_id_or_class: twitter-share-button | ||
23 | strip_id_or_class: afterpost | ||
24 | strip_id_or_class: tags | ||
25 | |||
26 | # Try it yourself | ||
27 | test_url: http://www.canonrumors.com/2014/09/chuck-westfall-talks-canon-eos-7d-mark-ii/ | ||
28 | test_url: http://www.canonrumors.com/2014/09/canon-cinema-eos-captures-space-in-4k-for-new-imax-3d-film/ | ||
diff --git a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt deleted file mode 100755 index 49f34302..00000000 --- a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class='producttabbed-title'] | ||
2 | body: //div[@class='postTabs_divs postTabs_curr_div'] | ||
3 | strip: //div[@class='ratingblock2'] | ||
4 | strip: //p[@id='breadcrumbs'] | ||
5 | strip: //div[@style='display: none'] | ||
6 | |||
7 | |||
8 | test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/carpeaqua.com.txt b/inc/3rdparty/site_config/standard/carpeaqua.com.txt deleted file mode 100755 index 5ea302e0..00000000 --- a/inc/3rdparty/site_config/standard/carpeaqua.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@class='entry'] | ||
3 | |||
4 | prune: no | ||
5 | # otherwise the footnotes are removed | ||
6 | test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cars.com.txt b/inc/3rdparty/site_config/standard/cars.com.txt deleted file mode 100755 index 71c5c050..00000000 --- a/inc/3rdparty/site_config/standard/cars.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[contains(@class, 'basicInfo')]//h1 | ||
2 | |||
3 | body: //img[@id='chosenPhotoIMG'] | //div[@id='aboutThisVehicleBox'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.cars.com/go/search/detail.jsp?listingId=115364779 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/catb.org.txt b/inc/3rdparty/site_config/standard/catb.org.txt deleted file mode 100755 index 2cd197fb..00000000 --- a/inc/3rdparty/site_config/standard/catb.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@class='article'] | ||
2 | strip: //div[@class='revhistory'] | ||
3 | strip: //div[@class='toc'] | ||
4 | tidy: no | ||
5 | prune: no | ||
6 | |||
7 | test_url: http://catb.org/~esr/faqs/smart-questions.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cbc.ca.txt b/inc/3rdparty/site_config/standard/cbc.ca.txt deleted file mode 100755 index ba5faf3f..00000000 --- a/inc/3rdparty/site_config/standard/cbc.ca.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[contains(@class, 'headline')]/h1 | ||
2 | author: //h5[contains(@class, 'byline')] | ||
3 | date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ') | ||
4 | body: //div[@id="storyboard"] | ||
5 | test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cbn.com.txt b/inc/3rdparty/site_config/standard/cbn.com.txt deleted file mode 100755 index de8d8839..00000000 --- a/inc/3rdparty/site_config/standard/cbn.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[contains(@class, 'articleText')] | ||
2 | date: //div[contains(@class, 'articleDate')] | ||
3 | author: //a[contains(@id, 'articleDetails_lnkByLine')] | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.cbn.com/cbnnews/world/2013/June/Chilly-G-8-Obama-Putin-Agree-to-Disagree-on-Syria/ | ||
7 | test_url: http://www.cbn.com/cbnnews/world/2013/June/UK-Agency-Accused-of-Hacking-Foreign-Diplomats/ | ||
8 | test_url: http://www.cbn.com/cbnnews/feed/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cbsnews.com.txt b/inc/3rdparty/site_config/standard/cbsnews.com.txt deleted file mode 100755 index 04d20230..00000000 --- a/inc/3rdparty/site_config/standard/cbsnews.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | date: //meta[@name="published"]/@content | ||
2 | date: //div[@class="timeLine"] | ||
3 | title: //div[@id='contentBody']//h1 | ||
4 | author: //dl[@class="storyBlogByline"]/dd/a | ||
5 | body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')] | ||
6 | |||
7 | # Content Pruning | ||
8 | strip: //div[@class="scrollingArrows"] | ||
9 | strip: //div[@class="timeLine"] | ||
10 | strip: //dl[@class="storyBlogByline"] | ||
11 | strip: //span[@class='image-credit'] | ||
12 | |||
13 | prune: no | ||
14 | |||
15 | test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ | ||
diff --git a/inc/3rdparty/site_config/standard/cedarrepublican.com.txt b/inc/3rdparty/site_config/standard/cedarrepublican.com.txt deleted file mode 100755 index 42faa521..00000000 --- a/inc/3rdparty/site_config/standard/cedarrepublican.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='frame']//img[@class='horizontal'] | //div[@class='content'] | ||
2 | test_url: http://cedarrepublican.com/online_features/gift_ideas/sending-mother-s-day-flowers-how-to-be-sure-they/article_b69af9b8-1f05-5352-8621-16ce007e5623.html | ||
diff --git a/inc/3rdparty/site_config/standard/chareidi.org.txt b/inc/3rdparty/site_config/standard/chareidi.org.txt deleted file mode 100755 index de34a7d8..00000000 --- a/inc/3rdparty/site_config/standard/chareidi.org.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | title: //h1 | ||
2 | test_url: http://www.chareidi.org/archives5772/tetzaveh/TZV72adraft.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chinamining.org.txt b/inc/3rdparty/site_config/standard/chinamining.org.txt deleted file mode 100755 index d00d65de..00000000 --- a/inc/3rdparty/site_config/standard/chinamining.org.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //*[@id='Content']/span[1] | ||
2 | author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(') | ||
3 | date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter') | ||
4 | |||
5 | strip: //*[@id='Content']/span[1] | ||
6 | strip: //*[@id='Content']/span[2] | ||
7 | |||
8 | body: //*[@id='Content'] | ||
9 | |||
10 | test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chomsky.info.txt b/inc/3rdparty/site_config/standard/chomsky.info.txt deleted file mode 100755 index 2645f119..00000000 --- a/inc/3rdparty/site_config/standard/chomsky.info.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='title'] | ||
2 | author: //div[@class='author'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.chomsky.info/onchomsky/2002----.htm | ||
6 | test_contains: The propaganda model argues | ||
diff --git a/inc/3rdparty/site_config/standard/chrisltd.com.txt b/inc/3rdparty/site_config/standard/chrisltd.com.txt deleted file mode 100755 index 86d0f5db..00000000 --- a/inc/3rdparty/site_config/standard/chrisltd.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //header/h1/b[contains(@class, 'title')] | ||
2 | author: substring-after(//article/header/div, 'By ') | ||
3 | date: //header/h1/span[contains(@class, 'date')] | ||
4 | body: //div[@id='main]/article | ||
5 | strip: //header | ||
6 | test_url: http://chrisltd.com/blog/2012/03/fix-widows-indesign/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/christianitytoday.com.txt b/inc/3rdparty/site_config/standard/christianitytoday.com.txt deleted file mode 100755 index 86be14ce..00000000 --- a/inc/3rdparty/site_config/standard/christianitytoday.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title://div[@class='title'] | ||
2 | author://div[@class='byline']/b | ||
3 | date:substring-after(//div[@class='byline'], 'posted') | ||
4 | body://div[@id='body'] | ||
5 | wrap_in(h2)://span[@class='subhead'] | ||
6 | wrap_in(i)://p[@class='bio'] | ||
7 | wrap_in(i)://p[@class='copyright'] | ||
8 | strip://div[@class='title'] | ||
9 | strip://div[@class='deck'] | ||
10 | strip://div[@class='byline'] | ||
11 | strip://div[@class='copyright'] | ||
12 | strip://br | ||
13 | test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/christianpf.com.txt b/inc/3rdparty/site_config/standard/christianpf.com.txt deleted file mode 100755 index fb5f342d..00000000 --- a/inc/3rdparty/site_config/standard/christianpf.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | author: //*[@class="author vcard fn"] | ||
3 | date: //*[@class="published"] | ||
4 | body: //div[(@class = "dd_content_wrap")] | ||
5 | test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/christies.com.txt b/inc/3rdparty/site_config/standard/christies.com.txt deleted file mode 100755 index b3c76519..00000000 --- a/inc/3rdparty/site_config/standard/christies.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | date: //article//time[@pubdate] | ||
4 | title: //article/header/h2 | ||
5 | body: //article | ||
6 | test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chrome.google.com.txt b/inc/3rdparty/site_config/standard/chrome.google.com.txt deleted file mode 100755 index 5a1d043d..00000000 --- a/inc/3rdparty/site_config/standard/chrome.google.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //pre[@id='cx-desc-text'] | ||
2 | body: //div[contains(@class, 'overview-tab-right-bar-info')] | ||
3 | title: //h1[contains(@class, 'detail-dialog-title')] | ||
4 | tidy: no | ||
5 | prune: no | ||
6 | replace_string(<noscript>): <div> | ||
7 | replace_string(</noscript>): </div> | ||
8 | |||
9 | test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chronicle.com.txt b/inc/3rdparty/site_config/standard/chronicle.com.txt deleted file mode 100755 index e86d3eca..00000000 --- a/inc/3rdparty/site_config/standard/chronicle.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | title: //h1[contains(@class, "entry-title")] | ||
2 | author: //p[contains(@class, "byline")] | ||
3 | |||
4 | # blog articles (chronicle.com/blogs/*) | ||
5 | body: //div[contains(@class, "abstract")] | ||
6 | date: //p[contains(@class, "time")] | ||
7 | |||
8 | # all (?) other articles | ||
9 | body: //div[@id="article-body"] | ||
10 | date: //p[contains(@class, "dateline")] | ||
11 | |||
12 | # remove sidebars containing images (I assume this is desired for Instapaper) | ||
13 | strip: //div[@id="related"] | ||
14 | strip: //div[contains(@class, "image")] | ||
15 | |||
16 | # note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet | ||
17 | test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ciaosamin.com.txt b/inc/3rdparty/site_config/standard/ciaosamin.com.txt deleted file mode 100755 index 02fd3434..00000000 --- a/inc/3rdparty/site_config/standard/ciaosamin.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body://div[contains(@class, 'entry-content')] | ||
2 | date://h2[contains(@class, 'date-header')] | ||
3 | title://h3[contains(@class, 'post-title')] | ||
4 | test_url: http://www.ciaosamin.com/2013/04/how-this-happened.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cicero.de.txt b/inc/3rdparty/site_config/standard/cicero.de.txt deleted file mode 100755 index b8913639..00000000 --- a/inc/3rdparty/site_config/standard/cicero.de.txt +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | # fforst@... | ||
2 | |||
3 | # Use link to print article for single page view | ||
4 | single_page_link: //a[@class="print"] | ||
5 | |||
6 | # set body | ||
7 | tidy: no | ||
8 | body: //div[@class='artikel-content'] | ||
9 | |||
10 | # strip title and subtitle since we got it already | ||
11 | strip: //div[@class='issue'] | ||
12 | strip: //div[@class='artikel-content']/h2 | ||
13 | |||
14 | # some authors are known and have a link, others don't | ||
15 | author: //a[contains(@href, 'autor?')] | ||
16 | |||
17 | #date | ||
18 | date: //span[@class='article-date'] | ||
19 | |||
20 | # Strip author since we got him | ||
21 | strip_id_or_class: author | ||
22 | |||
23 | #strip captions | ||
24 | strip_id_or_class: field-name-field-image-credit | ||
25 | strip_id_or_class: field-name-field-article-image-subtitle | ||
26 | |||
27 | # remove community functions | ||
28 | strip: //div[@class='meta'] | ||
29 | strip: //div[@id='comments'] | ||
30 | |||
31 | # remove "continue on the next page" text | ||
32 | strip: //p[text()="[SEITE]"] | ||
33 | test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ciperchile.cl.txt b/inc/3rdparty/site_config/standard/ciperchile.cl.txt deleted file mode 100755 index d7e9b762..00000000 --- a/inc/3rdparty/site_config/standard/ciperchile.cl.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //*[(@id = "articlebody")] | ||
2 | strip_id_or_class: rotulo | ||
3 | |||
4 | test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cjr.org.txt b/inc/3rdparty/site_config/standard/cjr.org.txt deleted file mode 100755 index df4c7cc4..00000000 --- a/inc/3rdparty/site_config/standard/cjr.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body'] | ||
2 | prune: no | ||
3 | |||
4 | single_page_link: //li[@class='print']/a | ||
5 | |||
6 | test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/classyllama.com.txt b/inc/3rdparty/site_config/standard/classyllama.com.txt deleted file mode 100755 index 1864eee8..00000000 --- a/inc/3rdparty/site_config/standard/classyllama.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | date: //div[@id='content']//p[contains(@class, 'date')]/span | ||
2 | author: substring-after(//div[@id='content']//div[contains(@class, 'over-under-bars')]/p[last()]/text(), 'Posted by ') | ||
3 | body: //div[@id='content']//div[@class='pane-content'] | ||
4 | strip_id_or_class: trackback-url | ||
5 | strip_id_or_class: over-under-bars | ||
6 | test_url: http://www.classyllama.com/content/layout-caching \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/clientk.com.txt b/inc/3rdparty/site_config/standard/clientk.com.txt deleted file mode 100755 index d5a22ccb..00000000 --- a/inc/3rdparty/site_config/standard/clientk.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title://div[@class="entrytitle"]/a | ||
2 | author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ") | ||
3 | date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted") | ||
4 | body://div[@class="entrybody"] | ||
5 | strip://div[@class="entrybody"]//p[@class="singleinfo"] | ||
6 | test_url: http://clientk.com/2011/12/19/the-impact-of-more/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/clubic.com.txt b/inc/3rdparty/site_config/standard/clubic.com.txt deleted file mode 100755 index 0148e54c..00000000 --- a/inc/3rdparty/site_config/standard/clubic.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //a[@class='auteur'] | ||
3 | body: //div[@class='editorial'] | ||
4 | next_page_link: //a[contains(text(),'Page suivante')] | ||
5 | strip: //a[contains(text(),'Page suivante')] | ||
6 | strip: //a[contains(text(),'Page précédente')] | ||
7 | strip_id_or_class: slideshow | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cmswire.com.txt b/inc/3rdparty/site_config/standard/cmswire.com.txt deleted file mode 100755 index 0b76377a..00000000 --- a/inc/3rdparty/site_config/standard/cmswire.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[contains(@id,'article-body')] | ||
2 | strip://div[contains(@id,'disqus_count_block')] | ||
3 | strip://div[contains(@id,'col-left')] | ||
4 | strip://div[contains(@id,'col-right')] | ||
5 | |||
6 | test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cn.engadget.com.txt b/inc/3rdparty/site_config/standard/cn.engadget.com.txt deleted file mode 100755 index 63f6f7ea..00000000 --- a/inc/3rdparty/site_config/standard/cn.engadget.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h2[@class="posttitle"] | ||
2 | body: //div[@class="postbody"] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://cn.engadget.com/2013/06/29/google-play-music-all-access/ | ||
diff --git a/inc/3rdparty/site_config/standard/cn.reuters.com.txt b/inc/3rdparty/site_config/standard/cn.reuters.com.txt deleted file mode 100755 index 28f10472..00000000 --- a/inc/3rdparty/site_config/standard/cn.reuters.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id='maincontent']//h1 | ||
2 | body: //div[@id='resizeableText'] | ||
3 | |||
4 | single_page_link: concat(//link[@rel='canonical']/@href, '?sp=true') | ||
5 | |||
6 | test_url: http://cn.reuters.com/article/CNAnalysesNews/idCNKBS0FF0NM20140710 | ||
7 | test_url: http://cn.reuters.feedsportal.com/CNAnalysesNews | ||
8 | # multipage link | ||
9 | test_url: http://cn.reuters.com/article/idCNKBS0FF0UL20140710 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cnet.com.txt b/inc/3rdparty/site_config/standard/cnet.com.txt deleted file mode 100755 index eac08aaa..00000000 --- a/inc/3rdparty/site_config/standard/cnet.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[contains(@class, 'postBody')] | ||
3 | date: //div[@id='nameAndTime']/time | ||
4 | author: //div[@id='nameAndTime']/span[@class='author'] | ||
5 | |||
6 | strip_id_or_class: image-credit | ||
7 | strip_id_or_class: noAutolink | ||
8 | strip_id_or_class: related | ||
9 | |||
10 | prune: no | ||
11 | tidy: no | ||
12 | |||
13 | # early end | ||
14 | replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html> | ||
15 | |||
16 | test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cnn.com.txt b/inc/3rdparty/site_config/standard/cnn.com.txt deleted file mode 100755 index 6f69e4e8..00000000 --- a/inc/3rdparty/site_config/standard/cnn.com.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] | ||
2 | title: //div[@class="cnn_storyarea"]/h1 | ||
3 | author: //div[@class="cnnByline"]/strong | ||
4 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun') | ||
5 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon') | ||
6 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue') | ||
7 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed') | ||
8 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu') | ||
9 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri') | ||
10 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat') | ||
11 | strip: //div[@class="cnn_storyarea"]/h1 | ||
12 | strip_id_or_class: cnnByline | ||
13 | strip_id_or_class: cnn_strytmstmp | ||
14 | strip_id_or_class: cnn_strycaptiontxt | ||
15 | strip_id_or_class: cnn_strybtntoolsbttm | ||
16 | strip_id_or_class: cnn_strybtntools | ||
17 | strip_id_or_class: cnn_strybtmcntnt | ||
18 | strip_id_or_class: sharebar | ||
19 | #strip_id_or_class: cnn_containerwht | ||
20 | strip_id_or_class: cnn_stryathrtmp | ||
21 | replace_string(<a name="em0"></a>): <!-- a name --> | ||
22 | test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories | ||
23 | test_url: http://rss.cnn.com/rss/edition.rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cnnsi.com.txt b/inc/3rdparty/site_config/standard/cnnsi.com.txt deleted file mode 100755 index ac49aef9..00000000 --- a/inc/3rdparty/site_config/standard/cnnsi.com.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | # main sportsillustrated.com articles | ||
2 | |||
3 | body: //div[@id="cnnStoryContent"] | ||
4 | title: //div[@id="cnnStoryHeadline"]//h1 | ||
5 | author: //div[@id="cnnSubBanner"]//strong | ||
6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") | ||
7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") | ||
8 | |||
9 | # kill ugly font buttons | ||
10 | strip: //div[@id="cnnSCFontButtons"] | ||
11 | |||
12 | # kill misc filler videos & etc | ||
13 | strip: //div[@class="cnnDivideContent"] | ||
14 | strip: //*[@class="cnnTMbox"] | ||
15 | |||
16 | # si vault articles | ||
17 | # ------------- | ||
18 | body: //div[@class="siv_artPara"] | ||
19 | title: //div[@class="siv_artHeader"]//h1 | ||
20 | author: //div[@class="byline"] | ||
21 | date: //div[@class="date"] | ||
22 | |||
23 | next_page_link: //div[@id='cnnStoryContinue']/a | ||
24 | strip_id_or_class: cnnstorypagination | ||
25 | |||
26 | test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/code.activestate.com.txt b/inc/3rdparty/site_config/standard/code.activestate.com.txt deleted file mode 100755 index 83a21e19..00000000 --- a/inc/3rdparty/site_config/standard/code.activestate.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | title: //div[@id='page_header']/h1 | ||
3 | |||
4 | strip_id_or_class: 'lineno' | ||
5 | strip_id_or_class: 'block-toolbar-button' | ||
6 | strip_id_or_class: 'recipe_score' | ||
7 | strip: //div[@id='recipe_tools'] | ||
8 | strip: //div[@id='addcomment'] | ||
9 | |||
10 | test_url: http://code.activestate.com/recipes/500261-named-tuples/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/code.fivefilters.org.txt b/inc/3rdparty/site_config/standard/code.fivefilters.org.txt deleted file mode 100755 index f8a88cae..00000000 --- a/inc/3rdparty/site_config/standard/code.fivefilters.org.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id='readme'] | ||
2 | |||
3 | test_url: http://code.fivefilters.org/full-text-rss | ||
diff --git a/inc/3rdparty/site_config/standard/code.google.com.txt b/inc/3rdparty/site_config/standard/code.google.com.txt deleted file mode 100755 index 6e9c00a7..00000000 --- a/inc/3rdparty/site_config/standard/code.google.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@id="gc-pagecontent"] | ||
2 | strip: //a[@class="backtotop"] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/codeproject.com.txt b/inc/3rdparty/site_config/standard/codeproject.com.txt deleted file mode 100755 index d1191acc..00000000 --- a/inc/3rdparty/site_config/standard/codeproject.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id="contentdiv"] | ||
2 | date: //span[@class="date"] | ||
3 | test_url: http://www.codeproject.com/Articles/499902/Profiling-Entity-Framework-5-in-code \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/codinghorror.com.txt b/inc/3rdparty/site_config/standard/codinghorror.com.txt deleted file mode 100755 index adf6e5a0..00000000 --- a/inc/3rdparty/site_config/standard/codinghorror.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | body: //div[@class='blogbody'] | ||
2 | strip: //h3[@class='title'] | ||
3 | date: //h2[@class='date'] | ||
4 | #Should Atwood just be a literal? | ||
5 | author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V') | ||
6 | |||
7 | # tim.kingman@... 2011-07-26 | ||
8 | # Prune:no to retain all-link ULs that are part of the body content like | ||
9 | # http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html | ||
10 | # Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed. | ||
11 | |||
12 | prune: no | ||
13 | strip: //div[@class='posted']/following-sibling::* | ||
14 | strip: //div[@class='posted'] | ||
15 | test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/collegehumor.com.txt b/inc/3rdparty/site_config/standard/collegehumor.com.txt deleted file mode 100755 index 318e6ff4..00000000 --- a/inc/3rdparty/site_config/standard/collegehumor.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | author: //p[@class='byline']/a[1] | ||
3 | date: //*[@class='date'] | ||
4 | |||
5 | body: //div[@class='article_body'] | ||
6 | strip: //p[@class='ca_intro'] | ||
7 | strip: //div[@id='action_bar'] | ||
8 | strip: //div[@class='below_content'] | ||
9 | strip: //div[@id='announcement'] | ||
10 | strip: //div[@id='leftovers'] | ||
11 | strip: //div[@class='form'] | ||
12 | strip: //div[@id='email_overlay'] | ||
13 | strip: //a[@class='close'] | ||
14 | test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt deleted file mode 100755 index 800a907d..00000000 --- a/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class="entry-body"] | ||
2 | test_url: http://communities-dominate.blogs.com/brands/2012/03/brutal-truth-about-lumia-cannot-sustain-even-1-to-1-replacement-of-symbian-windows-phone-strategy-do.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/community.service-now.com.txt b/inc/3rdparty/site_config/standard/community.service-now.com.txt deleted file mode 100755 index c9854b43..00000000 --- a/inc/3rdparty/site_config/standard/community.service-now.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[@id="center"]//div[@class="node"] | ||
2 | title: //div[@id="center"]//h2 | ||
3 | author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") | ||
4 | date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") | ||
5 | strip: //div[@id="center"]//h2[1] | ||
6 | strip: //span[@class="submitted"][1] | ||
7 | move_into(//div[@class="node"])://div[@class="breadcrumb"] | ||
8 | test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computer.org.txt b/inc/3rdparty/site_config/standard/computer.org.txt deleted file mode 100755 index 8345cf50..00000000 --- a/inc/3rdparty/site_config/standard/computer.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | strip_id_or_class:column-3 | ||
2 | strip_id_or_class:portlet-boundary | ||
3 | strip_id_or_class:banner | ||
4 | |||
5 | test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computerbase.de.txt b/inc/3rdparty/site_config/standard/computerbase.de.txt deleted file mode 100755 index 5973c50b..00000000 --- a/inc/3rdparty/site_config/standard/computerbase.de.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title://h1 | ||
2 | |||
3 | author://div[@id="news-meta"]/a | ||
4 | |||
5 | body://*[@id="main"]/div[1] | ||
6 | |||
7 | strip://*[@id="main"]/div[2] | ||
8 | strip://*[@id="main"]/div[3] | ||
9 | strip://*[@id="page"]//footer | ||
10 | |||
11 | #date: didn't manage to parse it | ||
12 | |||
13 | #Images have to be stripped because the page does it with overlay | ||
14 | strip://img | ||
15 | |||
16 | #figures are not displayed in instapaper... | ||
17 | strip://figure | //figcaption | ||
18 | test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computerworld.com.txt b/inc/3rdparty/site_config/standard/computerworld.com.txt deleted file mode 100755 index 7f20a4da..00000000 --- a/inc/3rdparty/site_config/standard/computerworld.com.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | title: //meta[@name='headline']/@content | ||
2 | date: //meta[@name='date']/@content | ||
3 | author: //meta[@name='author']/@content | ||
4 | body: //div[contains(@class, 'article')] | ||
5 | body://div[@id="article_body"] | ||
6 | |||
7 | strip_id_or_class: banner | ||
8 | strip: //noscript | ||
9 | strip: //div[@style='width:1px;height:130px;float:right;'] | ||
10 | strip: //div[@class='storyby'] | ||
11 | strip_image_src: twitter_icon | ||
12 | strip_image_src: rss_bug | ||
13 | |||
14 | tidy: no | ||
15 | prune: no | ||
16 | |||
17 | next_page_link://div[@id="next_page"]/a | ||
18 | |||
19 | single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/')) | ||
20 | |||
21 | test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware | ||
22 | test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computerworld.dk.txt b/inc/3rdparty/site_config/standard/computerworld.dk.txt deleted file mode 100755 index d819109c..00000000 --- a/inc/3rdparty/site_config/standard/computerworld.dk.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | strip: //div[contains(@class, 'articleAdtechAd')] | ||
2 | title: //div[@id='article']/h1 | ||
3 | title: //div[contains(@class, 'article')]/h1 | ||
4 | body: //div[@id='articleText'] | ||
5 | test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/contemporist.com.txt b/inc/3rdparty/site_config/standard/contemporist.com.txt deleted file mode 100755 index c3120fe8..00000000 --- a/inc/3rdparty/site_config/standard/contemporist.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # get author from string like "Posted by <author> on <date>" | ||
2 | author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on') | ||
3 | |||
4 | # get date from string like "Posted by <author> on <date>" | ||
5 | date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on') | ||
6 | |||
7 | # this keeps thumbnail images | ||
8 | prune: no | ||
9 | test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt deleted file mode 100755 index 966cc861..00000000 --- a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@class='article_header']/h1 | ||
2 | body: //div[@class='article_header']/p | //div[@class='article_body'] | ||
3 | strip_id_or_class: share_this | ||
4 | strip_id_or_class: sociable | ||
5 | prune: no | ||
6 | |||
7 | test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cooper.com.txt b/inc/3rdparty/site_config/standard/cooper.com.txt deleted file mode 100755 index a4244097..00000000 --- a/inc/3rdparty/site_config/standard/cooper.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //*[contains(@class,'body')] | ||
2 | date: //abbr[@class='published'] | ||
3 | |||
4 | test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/core77.com.txt b/inc/3rdparty/site_config/standard/core77.com.txt deleted file mode 100755 index cf1fa93c..00000000 --- a/inc/3rdparty/site_config/standard/core77.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id="permalink"]/div[@class="post"] | ||
2 | |||
3 | strip: //div[@id='backArrow'] | ||
4 | strip: //div[@id='fwdArrow'] | ||
5 | strip: //div[@class="post-title"] | ||
6 | strip: //div[@class="sharing"] | ||
7 | test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/counterpunch.org.txt b/inc/3rdparty/site_config/standard/counterpunch.org.txt deleted file mode 100755 index b6bd8be5..00000000 --- a/inc/3rdparty/site_config/standard/counterpunch.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='main']//h1[contains(@class, 'article-title')] | ||
2 | author: //div[@class='mainauthorstyle'] | ||
3 | body: //div[@class='main']//div[@class='main-text'] | ||
4 | strip: //td[@width='140'] | ||
5 | |||
6 | test_url: http://www.counterpunch.org/johnstone05172011.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crazybutable.com.txt b/inc/3rdparty/site_config/standard/crazybutable.com.txt deleted file mode 100755 index 037cd177..00000000 --- a/inc/3rdparty/site_config/standard/crazybutable.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title://h2 | ||
2 | body://div[contains(@class, 'entrytext')] | ||
3 | test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crimemagazine.com.txt b/inc/3rdparty/site_config/standard/crimemagazine.com.txt deleted file mode 100755 index 9cf0bccc..00000000 --- a/inc/3rdparty/site_config/standard/crimemagazine.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | autodetect_next_page: no | ||
2 | test_url: http://www.crimemagazine.com/son-sam \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crimethinc.com.txt b/inc/3rdparty/site_config/standard/crimethinc.com.txt deleted file mode 100755 index b5a8018a..00000000 --- a/inc/3rdparty/site_config/standard/crimethinc.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class="readingtext"] | ||
2 | title: substring-after(substring-after(//title, ':'), ':') | ||
3 | test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crn.de.txt b/inc/3rdparty/site_config/standard/crn.de.txt deleted file mode 100755 index 61d5d6a7..00000000 --- a/inc/3rdparty/site_config/standard/crn.de.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | author: //p[contains(@class,'author')]/a | ||
2 | date: //div[contains(@class,'date')] | ||
3 | test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/csmonitor.com.txt b/inc/3rdparty/site_config/standard/csmonitor.com.txt deleted file mode 100755 index 70ab9885..00000000 --- a/inc/3rdparty/site_config/standard/csmonitor.com.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'head')] | ||
2 | |||
3 | # standard page | ||
4 | body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')] | ||
5 | # print page | ||
6 | body: //div[@id='mainColumn'] | ||
7 | |||
8 | author: //a[contains(@class, 'ui-author')] | ||
9 | |||
10 | single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')] | ||
11 | |||
12 | strip_id_or_class: storyToolbar | ||
13 | strip_id_or_class: promotion-tag | ||
14 | |||
15 | tidy: no | ||
16 | prune: no | ||
17 | |||
18 | test_url: http://www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84 | ||
diff --git a/inc/3rdparty/site_config/standard/csnbayarea.com.txt b/inc/3rdparty/site_config/standard/csnbayarea.com.txt deleted file mode 100755 index 1da60b4e..00000000 --- a/inc/3rdparty/site_config/standard/csnbayarea.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@id='csn_blogST_headline']/h1 | ||
2 | |||
3 | body: //div[@id='csn_blogST_main'] | ||
4 | strip_id_or_class: ipfootnotes | ||
5 | strip: //div[@id='csn_blogST_main']/p[1]/img | ||
6 | strip: //div[@id='csn_blogST_sidebar'] | ||
7 | test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/csnphilly.com.txt b/inc/3rdparty/site_config/standard/csnphilly.com.txt deleted file mode 100755 index c14a934a..00000000 --- a/inc/3rdparty/site_config/standard/csnphilly.com.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | # author's name is not isolated as a tag.... ugh | ||
2 | convert_double_br_tags: yes | ||
3 | body: //csn_blogST_main | ||
4 | |||
5 | #junk above and around the article | ||
6 | strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div | ||
7 | strip: /html/body/div[4]/header | ||
8 | strip_id_or_class: article-right-sidebar | ||
9 | strip_id_or_class: rsn-gigya-sharebar-container | ||
10 | strip_id_or_class: article-bottom | ||
11 | strip_id_or_class: hider | ||
12 | strip_id_or_class: footer | ||
13 | strip_id_or_class: masthead | ||
14 | strip_id_or_class: block-menu-menu-rsn-login-or-register | ||
15 | strip_id_or_class: block-menu-menu-header-links | ||
16 | strip_id_or_class: block-rsn-follow-bar-follow-bar | ||
17 | strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard | ||
18 | strip_id_or_class: logo | ||
19 | strip_id_or_class: element-invisible | ||
20 | strip_id_or_class: site-name | ||
21 | strip: //div[contains(@style, 'none')] | ||
22 | test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/css-tricks.com.txt b/inc/3rdparty/site_config/standard/css-tricks.com.txt deleted file mode 100755 index 3d8174aa..00000000 --- a/inc/3rdparty/site_config/standard/css-tricks.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title://article[contains(@id, "post-")]/h1 | ||
2 | date://article[contains(@id, "post-")]/p[@class="time"]/time | ||
3 | body://article[contains(@id, "post-")] | ||
4 | strip://article[contains(@id, "post-")]/p[@class="time"]/time | ||
5 | prune:yes | ||
6 | test_url: http://css-tricks.com/off-canvas-menu-with-css-target/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cucharasonica.com.txt b/inc/3rdparty/site_config/standard/cucharasonica.com.txt deleted file mode 100755 index e691fe83..00000000 --- a/inc/3rdparty/site_config/standard/cucharasonica.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://cucharasonica.com/2011/09/queen-busca-candidatos-para-su-propia-banda-tributo \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cw.com.tw.txt b/inc/3rdparty/site_config/standard/cw.com.tw.txt deleted file mode 100755 index 6e3a91ee..00000000 --- a/inc/3rdparty/site_config/standard/cw.com.tw.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | author://span[contains(@class,'reporter')] | ||
2 | |||
3 | date://span[contains(@class,'date')] | ||
4 | |||
5 | body://div[contains(@class,'mainContaner')] | ||
6 | |||
7 | strip://div[contains(@class,'mainHeaer')] | ||
8 | strip://div[contains(@class,'keyW')] | ||
9 | strip://div[contains(@class,'wonderful')] | ||
10 | strip://div[contains(@class,'pages')] | ||
11 | strip://div[contains(@class,'Topics TopicsW3')] | ||
12 | |||
13 | next_page_link://li[@class='pageNext']/a[contains(.,'下一頁')] | ||
14 | test_url: http://www.cw.com.tw/article/article.action?id=5032848 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt deleted file mode 100755 index 2bd66be8..00000000 --- a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | single_page_link: //a | ||
2 | tidy: no | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm | ||
diff --git a/inc/3rdparty/site_config/standard/dagogtid.no.txt b/inc/3rdparty/site_config/standard/dagogtid.no.txt deleted file mode 100755 index 1531472c..00000000 --- a/inc/3rdparty/site_config/standard/dagogtid.no.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //span[@class = 'overskriftEkstrastor'] | ||
2 | author: //em/a | ||
3 | |||
4 | test_url: http://dagogtid.no/nyhet.cfm?nyhetid=2414 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dailydot.com.txt b/inc/3rdparty/site_config/standard/dailydot.com.txt deleted file mode 100755 index 978ed1ce..00000000 --- a/inc/3rdparty/site_config/standard/dailydot.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | tidy: no | ||
2 | body: //article | ||
3 | |||
4 | test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dailykos.com.txt b/inc/3rdparty/site_config/standard/dailykos.com.txt deleted file mode 100755 index 6d4cb82a..00000000 --- a/inc/3rdparty/site_config/standard/dailykos.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@id='article-1']//div[contains(@class, 'article-body')] | ||
2 | title: //div[@class='meta']//a[@id='titleHref'] | ||
3 | date: //div[@class='meta']//p[@class='date'] | ||
4 | |||
5 | strip_id_or_class: invisible | ||
6 | strip_id_or_class: divider-doodle | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrich-s-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his-ex-wife | ||
diff --git a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt deleted file mode 100755 index cd29a4d4..00000000 --- a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@id='js-article-text'] | ||
2 | strip: //div[@class='explore-links'] | ||
3 | strip: //div[@id='js-article-text']/br[position()=1] | ||
4 | strip_id_or_class: print-or-mail-links | ||
5 | strip_id_or_class: shareArticles | ||
6 | strip_id_or_class: googleAds | ||
7 | strip_id_or_class: digg-button | ||
8 | strip_id_or_class: article-icon-links-container | ||
9 | strip_id_or_class: clickToEnlarge | ||
10 | tidy: no | ||
11 | |||
12 | test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dailystar.com.lb.txt b/inc/3rdparty/site_config/standard/dailystar.com.lb.txt deleted file mode 100755 index 3b153042..00000000 --- a/inc/3rdparty/site_config/standard/dailystar.com.lb.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='ec-blog-headline'] | ||
2 | body: //*[@id="divDetails"] | ||
3 | date: //*[@id="ctl00_ContentPlaceHolder1_tdDate"] | ||
4 | author: //*[@id="ctl00_ContentPlaceHolder1_anchorAuthor"]/a | ||
5 | autodetect_next_page: no | ||
6 | test_url: http://dailystar.com.lb/Opinion/Columnist/2012/Oct-10/190803-americas-new-modesty-in-the-mideast.ashx#axzz2928JP5xE \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/danleech.com.txt b/inc/3rdparty/site_config/standard/danleech.com.txt deleted file mode 100755 index 1d4cec77..00000000 --- a/inc/3rdparty/site_config/standard/danleech.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | date: //article//time[@pubdate] | ||
4 | title: //article/h1//span[contains(@class, 'entry-title')] | ||
5 | body: //article/div[contains(@class, 'entry-content')] | ||
6 | test_url: http://danleech.com/post/36822126876/simple-icons \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dansdata.com.txt b/inc/3rdparty/site_config/standard/dansdata.com.txt deleted file mode 100755 index 60669480..00000000 --- a/inc/3rdparty/site_config/standard/dansdata.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | autodetect_next_page: no | ||
2 | tidy: no | ||
3 | prune: no | ||
4 | body: //div[@class='NoOverflow'] | ||
5 | test_url: http://www.dansdata.com/gz129.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dantri.com.vn.txt b/inc/3rdparty/site_config/standard/dantri.com.vn.txt deleted file mode 100755 index f19fee7c..00000000 --- a/inc/3rdparty/site_config/standard/dantri.com.vn.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'fon31 mt2')] | ||
2 | body: //h2[contains(@class, 'fon33 mt1')] | //div[contains(@class, 'fon34 mt3')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://dantri.com.vn/su-kien/chang-trai-mot-minh-dap-xe-vuot-450km-de-vieng-mo-dai-tuong-869763.htm | ||
7 | test_url: http://dantri.com.vn/trangchu.rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/daringfireball.net.txt b/inc/3rdparty/site_config/standard/daringfireball.net.txt deleted file mode 100755 index 251cc670..00000000 --- a/inc/3rdparty/site_config/standard/daringfireball.net.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@class="article"]/h1 | ||
2 | author: //div[@id="Sidebar"]/p/strong | ||
3 | date: //h6[@class="dateline"] | ||
4 | body: //div[@class="article"] | ||
5 | strip: //h6[@class="dateline"] | ||
6 | strip: //div[@class="article"]/h1 | ||
7 | test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/datanami.com.txt b/inc/3rdparty/site_config/standard/datanami.com.txt deleted file mode 100755 index e9111a48..00000000 --- a/inc/3rdparty/site_config/standard/datanami.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@id="article"] | ||
2 | date: //p[@class="date"] | ||
3 | author: //p[@class="byline"] | ||
4 | test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dcurt.is.txt b/inc/3rdparty/site_config/standard/dcurt.is.txt deleted file mode 100755 index 524c4bf1..00000000 --- a/inc/3rdparty/site_config/standard/dcurt.is.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: (//article//h2)[1] | ||
2 | body: //article[contains(@class, 'post')] | ||
3 | date: //time[@id='top_time']/@datetime | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | test_url: http://dcurt.is/predictions-txt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/defomicron.net.txt b/inc/3rdparty/site_config/standard/defomicron.net.txt deleted file mode 100755 index 9f11258c..00000000 --- a/inc/3rdparty/site_config/standard/defomicron.net.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //article/h1 | ||
2 | author: //hgroup/h3/a | ||
3 | date: //time | ||
4 | body: //article | ||
5 | strip: //aside | ||
6 | footnotes: yes | ||
7 | prune: no | ||
8 | tidy: no | ||
9 | test_url: https://defomicron.net/2012/09/ios-6/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/delong.typepad.com.txt b/inc/3rdparty/site_config/standard/delong.typepad.com.txt deleted file mode 100755 index c4b922e4..00000000 --- a/inc/3rdparty/site_config/standard/delong.typepad.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | strip_id_or_class: banner | ||
2 | strip_id_or_class: gamma | ||
3 | strip_id_or_class: module-list | ||
4 | test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/democracynow.org.txt b/inc/3rdparty/site_config/standard/democracynow.org.txt deleted file mode 100755 index b0050b4f..00000000 --- a/inc/3rdparty/site_config/standard/democracynow.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[contains(@class, 'blog_body')] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.democracynow.org/blog/2014/1/9/the_fbi_the_nsa_and_a \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/derstandard.at.txt b/inc/3rdparty/site_config/standard/derstandard.at.txt deleted file mode 100755 index 07db3521..00000000 --- a/inc/3rdparty/site_config/standard/derstandard.at.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //div[@id='artikelHeader']/h1 | ||
2 | author: //span[@class='author'] | ||
3 | date: //span[@class='date'] | ||
4 | body: //div[@class='copytext'] | ||
5 | strip: //ul[@class='lookupLinksArtikel'] | ||
6 | |||
7 | strip: //div[@id='pageTop'] | ||
8 | strip: //div[@id='toolbar'] | ||
9 | strip: //div[@id='articleTools'] | ||
10 | strip: //div[@id='weiterlesen'] | ||
11 | strip: //div[@id='communityCanvas'] | ||
12 | |||
13 | test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/designsponge.com.txt b/inc/3rdparty/site_config/standard/designsponge.com.txt deleted file mode 100755 index 2cd2f1f6..00000000 --- a/inc/3rdparty/site_config/standard/designsponge.com.txt +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | # Set title | ||
7 | title: //header/h1 | ||
8 | |||
9 | # Set author | ||
10 | author: //a[rel='author'] | ||
11 | |||
12 | # Content is here | ||
13 | body: //article | ||
14 | |||
15 | # Tidy up before article | ||
16 | strip: //header | ||
17 | |||
18 | # Tidy up article | ||
19 | strip: //div[contains(@id, 'gallery-')] | ||
20 | replace_string(<a rel="attachment): <p rel="attachment | ||
21 | |||
22 | |||
23 | # Tidy up after article | ||
24 | strip: //div[@class='sm'] | ||
25 | strip_id_or_class: related | ||
26 | strip_id_or_class: comments | ||
27 | strip: //footer | ||
28 | |||
29 | # Try it yourself | ||
30 | test_url: http://www.designsponge.com/2010/06/seattle-design-guide.html | ||
31 | test_url: http://www.designsponge.com/2012/04/sneak-peek-liz-cook.html | ||
diff --git a/inc/3rdparty/site_config/standard/designtagebuch.de.txt b/inc/3rdparty/site_config/standard/designtagebuch.de.txt deleted file mode 100755 index 9020847f..00000000 --- a/inc/3rdparty/site_config/standard/designtagebuch.de.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | tidy: no | ||
2 | body: //div[@class='main'] | ||
3 | |||
4 | author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am') | ||
5 | date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ') | ||
6 | |||
7 | strip_id_or_class: pagelink | ||
8 | strip_id_or_class: wp-polls | ||
9 | |||
10 | next_page_link: //div[@class='post-page-next']/a | ||
11 | test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/desitvforum.net.txt b/inc/3rdparty/site_config/standard/desitvforum.net.txt deleted file mode 100755 index c77007b7..00000000 --- a/inc/3rdparty/site_config/standard/desitvforum.net.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
2 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
3 | |||
4 | prune: no | ||
5 | tidy: no | ||
6 | |||
7 | test_url: http://www.desitvforum.net/forum/watch-online/431739-creature-3d-2014-watch-online-download-dvd-rip.html | ||
diff --git a/inc/3rdparty/site_config/standard/details.com.txt b/inc/3rdparty/site_config/standard/details.com.txt deleted file mode 100755 index d1d8a29a..00000000 --- a/inc/3rdparty/site_config/standard/details.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class="content-headline"] | ||
2 | body: //div[@class="headers-container"] | //div[@class="content-container"] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | single_page_link: //li[@class='utility-print']/a | ||
7 | |||
8 | test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/deutsche-apotheker-zeitung.de.txt b/inc/3rdparty/site_config/standard/deutsche-apotheker-zeitung.de.txt deleted file mode 100755 index 36709cab..00000000 --- a/inc/3rdparty/site_config/standard/deutsche-apotheker-zeitung.de.txt +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | |||
3 | prune: yes | ||
4 | tidy: yes | ||
5 | |||
6 | title: //h1 | ||
7 | date: //p[@class='news_datum'] | ||
8 | author: //span[@class='author'] | ||
9 | |||
10 | body: //div[@class='tagesnews-content'] | ||
11 | |||
12 | # General clenaup | ||
13 | strip_id_or_class: dachzeile | ||
14 | strip: //h3 | ||
15 | strip: //p[@class='bodytext']//a | ||
16 | strip_id_or_class: autor_datum | ||
17 | strip_id_or_class: comments | ||
18 | strip_id_or_class: banner- | ||
19 | |||
20 | strip: //p[contains(., 'Lesen Sie')] | ||
21 | strip: //p[contains(., '– in DAZ')] | ||
22 | |||
23 | # Fix image captions | ||
24 | replace_string(<p class="image_caption">): <p><small><em> | ||
25 | replace_string(</dd>): </em></small></dd> | ||
26 | |||
27 | test_url: http://www.deutsche-apotheker-zeitung.de/pharmazie/news/2014/09/03/weniger-nebenwirkungen-aber-kein-zusatznutzen/13715.html | ||
28 | test_url: http://www.deutsche-apotheker-zeitung.de/recht/news/2014/09/02/urteile-zum-cannabis-eigenanbau-bfarm-geht-in-berufung/13716.html | ||
29 | |||
diff --git a/inc/3rdparty/site_config/standard/developers.facebook.com.txt b/inc/3rdparty/site_config/standard/developers.facebook.com.txt deleted file mode 100755 index 7609b72f..00000000 --- a/inc/3rdparty/site_config/standard/developers.facebook.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //div[@class="bodyText"]/h1 | ||
2 | author: //div[@class="picture"]/a/img/@alt | ||
3 | test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt deleted file mode 100755 index 6f1d4e27..00000000 --- a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | date: //h2[@class='date-header'] | ||
2 | body: //div[@class='post hentry'] | ||
3 | title: //h3 | ||
4 | strip: //div[@class='post-footer'] | ||
5 | |||
6 | test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt deleted file mode 100755 index b8243d0c..00000000 --- a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[contains(@class, 'source-data')] | ||
2 | strip: //button | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://dictionary.reference.com/browse/propaganda | ||
diff --git a/inc/3rdparty/site_config/standard/diepresse.com.txt b/inc/3rdparty/site_config/standard/diepresse.com.txt deleted file mode 100755 index ced189cc..00000000 --- a/inc/3rdparty/site_config/standard/diepresse.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='article']/h1 | ||
2 | date: substring-before(//p[@class='articletime'],'|') | ||
3 | body: //div[@id='articletext'] | ||
4 | strip: //div[@class='inlineDiashow'] | ||
5 | |||
6 | test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt deleted file mode 100755 index 80ce5ff3..00000000 --- a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # default parser works great | ||
2 | # only add "author" and "next page link" reference | ||
3 | # 2012-04-13 | ||
4 | |||
5 | next_page_link: //div[@class = 'pagination']/a[@class = 'next_page'] | ||
6 | |||
7 | author: //*[@class = 'author metadata']/a | ||
8 | test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt deleted file mode 100755 index 18ce370e..00000000 --- a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='post-title']/h1 | ||
2 | author: //a[@href='#author'] | ||
3 | body: //div[@class='post-content'] | ||
4 | strip: //div[@class='post-meta'] | ||
5 | |||
6 | test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt deleted file mode 100755 index f48bdfdb..00000000 --- a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class="article_header"]/h1 | ||
2 | date: //div[@class="article_pub"]/span[@class="time"] | ||
3 | author: //div[@class="article_pub"]/span[@class="editors"]/a/text() | ||
4 | body: //div[@class="article_body clear_left"] | ||
5 | test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dilbert.com.txt b/inc/3rdparty/site_config/standard/dilbert.com.txt deleted file mode 100755 index 85cc78e5..00000000 --- a/inc/3rdparty/site_config/standard/dilbert.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | #title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10) | ||
2 | title: //div[contains(@class, 'SB_Title')]//a | ||
3 | body: //div[contains(@class, 'STR_Image')] | ||
4 | body: //*[contains(@class, 'SB_Content')] | ||
5 | author: string('Scott Adams') | ||
6 | date: //*[contains(@class, 'SB_Detail')]/text()[1] | ||
7 | |||
8 | |||
9 | test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ | ||
10 | test_url: http://dilbert.com/strips/comic/2013-10-22 | ||
11 | test_url: http://feed.dilbert.com/dilbert/daily_strip \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dinamalar.com.txt b/inc/3rdparty/site_config/standard/dinamalar.com.txt deleted file mode 100755 index bc315cf1..00000000 --- a/inc/3rdparty/site_config/standard/dinamalar.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //div[@class='newsdetbd'] | ||
2 | body: //div[@id='innerleft'] | ||
3 | #//p[@class = 'plnht'] | ||
4 | strip_image_src: /albums/ | ||
5 | strip: //div[@class='mrrt'] | ||
6 | prune: yes | ||
7 | strip_id_or_class: 'fdpd' | ||
8 | strip_id_or_class: 'epapt' | ||
9 | strip_id_or_class: 'newsrtwd' | ||
10 | strip_id_or_class: 'padtp' | ||
11 | strip_id_or_class: 'newdt' | ||
12 | strip_id_or_class: 'newdlt' | ||
13 | strip: //div[@id='selNotes'] | ||
14 | strip_id_or_class: 'clsNotes' | ||
15 | strip_id_or_class: 'clear' | ||
16 | strip_id_or_class: 'cmtwrap' | ||
17 | strip_id_or_class: 'sess' | ||
18 | strip_id_or_class: 'parents' | ||
19 | test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dn.pt.txt b/inc/3rdparty/site_config/standard/dn.pt.txt deleted file mode 100755 index 051b8cb9..00000000 --- a/inc/3rdparty/site_config/standard/dn.pt.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | single_page_link: concat('http://www.dn.pt/Common/print.aspx?content_id=', //input[@type='hidden' and @name='link-comments']/@value) | ||
2 | #<input type="hidden" name="link-comments" class="link-comments" value="3972244"> | ||
3 | |||
4 | title: //h1 | ||
5 | author: //div[@class="Author"] | ||
6 | |||
7 | strip: //div[@class="Patrocinio"] | ||
8 | |||
9 | test_url: http://www.dn.pt/inicio/opiniao/interior.aspx?content_id=3972244&seccao=Alberto%20Gon%E7alves&tag=Opini%E3o%20-%20Em%20Foco&page=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dn.se.txt b/inc/3rdparty/site_config/standard/dn.se.txt deleted file mode 100755 index 5283a0cd..00000000 --- a/inc/3rdparty/site_config/standard/dn.se.txt +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | # Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height. | ||
2 | |||
3 | body: //div[@id="article-content"] | ||
4 | |||
5 | |||
6 | # Ads | ||
7 | strip_id_or_class: advert-space | ||
8 | |||
9 | # Read more, recommend, comments etc | ||
10 | strip_id_or_class: fbc-recommend | ||
11 | strip_id_or_class: recommend | ||
12 | strip_id_or_class: article-readers | ||
13 | strip_id_or_class: article-addons | ||
14 | strip_id_or_class: hook | ||
15 | strip_id_or_class: right | ||
16 | strip_id_or_class: footer | ||
17 | |||
18 | # Other news | ||
19 | strip: //div[@id="mirrors"] | ||
20 | |||
21 | # Author | ||
22 | author: //div[@id="byline"]/div/p/strong | ||
23 | |||
24 | # Date | ||
25 | date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) | ||
26 | |||
27 | test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade | ||
28 | test_url: http://www.dn.se/m/rss/senaste-nytt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt b/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt deleted file mode 100755 index 972293bc..00000000 --- a/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //*[@class="news"]//h1[@class="title"] | ||
2 | author: //*[@class="news"]//*[@class="newsInfo"]/a | ||
3 | date: substring-before(//*[@class="news"]//*[@class="newsInfo"]/text(), ',') | ||
4 | body: //*[@class="news"]//*[@class="newsContent"] | ||
5 | footnotes: no | ||
6 | test_url: http://www.dobreprogramy.pl/Sony-konczy-z-Foldinghome-na-PS3,Aktualnosc,36899.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/doctac.com.txt b/inc/3rdparty/site_config/standard/doctac.com.txt deleted file mode 100755 index 1c518a9b..00000000 --- a/inc/3rdparty/site_config/standard/doctac.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | strip: //*[(@id = "featured")] | ||
2 | |||
3 | author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') | ||
4 | |||
5 | date: concat(//div[@class='month'],' ',//div[@class='day']) | ||
6 | |||
7 | #doctac doesn't provide a year, but month/day is better than nothing | ||
8 | test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/domusweb.it.txt b/inc/3rdparty/site_config/standard/domusweb.it.txt deleted file mode 100755 index 20566ee3..00000000 --- a/inc/3rdparty/site_config/standard/domusweb.it.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # TODO: clean up the extra junk at the end of articles | ||
2 | |||
3 | # general text formatting | ||
4 | prune: no | ||
5 | convert_double_br_tags:yes | ||
6 | |||
7 | # where to find the basic metadata | ||
8 | author://a[@class='articleauthor'] | ||
9 | date://a[starts-with(@href,'/en/search/published/')] | ||
10 | title:substring-before(//h2[@class='title'],'—') | ||
11 | body://div[@id='maincontainer'] | ||
12 | |||
13 | dissolve://div[starts-with(@id,'commentableblock')] | ||
14 | |||
15 | # clean up the crap | ||
16 | strip://div[contains(@class,'domusnetwork')] | ||
17 | strip://div[contains(@class,'relative_wrapper')] | ||
18 | |||
19 | strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')] | ||
20 | wrap_in(em): //div[contains(@class,'captionsubimage')]/span | ||
21 | test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dou.ua.txt b/inc/3rdparty/site_config/standard/dou.ua.txt deleted file mode 100755 index 0f983112..00000000 --- a/inc/3rdparty/site_config/standard/dou.ua.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@itemprop="name"] | ||
2 | |||
3 | author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a | ||
4 | |||
5 | date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')] | ||
6 | |||
7 | body: //div[contains(@class, 'b-typo')] | ||
8 | test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/douban.com.txt b/inc/3rdparty/site_config/standard/douban.com.txt deleted file mode 100755 index d72a2223..00000000 --- a/inc/3rdparty/site_config/standard/douban.com.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.douban.com/note/215003067/ | ||
3 | # http://www.douban.com/note/213540049/ | ||
4 | # http://www.douban.com/group/topic/31140104/ | ||
5 | |||
6 | title: //div[@class='note-header']/h1 | ||
7 | title: //div[@id='content']/h1 | ||
8 | |||
9 | author: //div[@class='info']/ul/li/a | ||
10 | author: //h3/span/a | ||
11 | |||
12 | date://div[@class='note-header']/div/span | ||
13 | date://h3/span[contains(@class, 'color-green')] | ||
14 | |||
15 | body://div[contains(@class, 'note')] | ||
16 | body://div[contains(@class, 'topic-content')] | ||
17 | |||
18 | strip://h3 | ||
19 | |||
20 | convert_double_br_tags: yes | ||
21 | test_url: http://www.douban.com/group/topic/31140104/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dpreview.com.txt b/inc/3rdparty/site_config/standard/dpreview.com.txt deleted file mode 100755 index 001c810f..00000000 --- a/inc/3rdparty/site_config/standard/dpreview.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # next_page_link for product review | ||
2 | # example: http://www.dpreview.com/reviews/lytro/ | ||
3 | next_page_link: //img[@alt = 'Next page']/../@href | ||
4 | |||
5 | # next_page_link for other articles | ||
6 | # example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 | ||
7 | next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a | ||
8 | single_page_link: //a[contains(.,'Print view')] | ||
9 | test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dr.dk.txt b/inc/3rdparty/site_config/standard/dr.dk.txt deleted file mode 100755 index d8ec1acf..00000000 --- a/inc/3rdparty/site_config/standard/dr.dk.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | author: //div[@class='articleFunctions']//a | ||
3 | date: //meta[@name='pubdate']/@content | ||
4 | |||
5 | # Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason) | ||
6 | body: //div[@class='articleContent'] | ||
7 | |||
8 | tidy: no | ||
9 | test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dramasonline.com.txt b/inc/3rdparty/site_config/standard/dramasonline.com.txt deleted file mode 100755 index 4898353b..00000000 --- a/inc/3rdparty/site_config/standard/dramasonline.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@class='postext'] | ||
2 | |||
3 | strip_id_or_class: ratingblock | ||
4 | strip_id_or_class: hreview-aggregate | ||
5 | strip: //div[contains(@style, 'display: none;')] | ||
6 | |||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/drdobbs.com.txt b/inc/3rdparty/site_config/standard/drdobbs.com.txt deleted file mode 100755 index b1a9db6f..00000000 --- a/inc/3rdparty/site_config/standard/drdobbs.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | single_page_link: //a[contains(@href, '/article/print')] | ||
2 | test_url: http://www.drdobbs.com/architecture-and-design/240001128 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/drive2.ru.txt b/inc/3rdparty/site_config/standard/drive2.ru.txt deleted file mode 100755 index d500cb81..00000000 --- a/inc/3rdparty/site_config/standard/drive2.ru.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@class = "description"] | ||
2 | body: //div[@id = "post"] | ||
3 | |||
4 | strip_id_or_class: vcard | ||
5 | strip_id_or_class: journallist | ||
6 | strip_id_or_class: infobox | ||
7 | strip_id_or_class: terms | ||
8 | strip_id_or_class: replieslist | ||
9 | strip_id_or_class: communityside | ||
10 | |||
11 | |||
12 | test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dropbox.com.txt b/inc/3rdparty/site_config/standard/dropbox.com.txt deleted file mode 100755 index 3b51569f..00000000 --- a/inc/3rdparty/site_config/standard/dropbox.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link: //a[@id='download_button_link'] | ||
2 | |||
3 | test_url: https://www.dropbox.com/s/qmocfrco2t0d28o/Fluffbeast.docx | ||
diff --git a/inc/3rdparty/site_config/standard/drupal.org.txt b/inc/3rdparty/site_config/standard/drupal.org.txt deleted file mode 100755 index 2da3eb1c..00000000 --- a/inc/3rdparty/site_config/standard/drupal.org.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title://h1 | ||
2 | author://div[@class="submitted"]/a | ||
3 | date:substring-after(//div[@class="meta"],'modified: ') | ||
4 | date:substring-after(//div[@class="submitted"],'on ') | ||
5 | body://div[@class="node-content"] | ||
6 | strip://div[@class="meta"] | ||
7 | strip_id_or_class:book-navigation | ||
8 | test_url: http://drupal.org/node/1327354 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt deleted file mode 100755 index 2978797e..00000000 --- a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h2/a | ||
2 | author: substring-before(substring-after(//span[@class='byline'], 'by'), ',') | ||
3 | date: substring-before(substring-after(//span[@class='byline'], ','), '|') | ||
4 | body: //div[@class='entry'] | ||
5 | |||
6 | |||
7 | # strip out auction stuff at the end of posts | ||
8 | # tidy kills the center tag, so disable it | ||
9 | tidy: no | ||
10 | strip: //center//table | ||
11 | test_url: http://www.dukebasketballreport.com/articles/?p=42660 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dushumashang.com.txt b/inc/3rdparty/site_config/standard/dushumashang.com.txt deleted file mode 100755 index 6a50a77e..00000000 --- a/inc/3rdparty/site_config/standard/dushumashang.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.dushumashang.com/2389 | ||
3 | # http://www.dushumashang.com/2415 | ||
4 | # http://www.dushumashang.com/2355 | ||
5 | |||
6 | body://div[@class='main_content'] | ||
7 | #body://section[@class='entry_content fl'] | ||
8 | title://h2 | ||
9 | author://span[@class='article_author']/a | ||
10 | date://span[@class='pub_date']/time | ||
11 | |||
12 | strip://span[@class='article_author'] | ||
13 | strip://span[@class='pub_date'] | ||
14 | strip://div[@class='page_turn'] | ||
15 | strip://span[@class='source_link']/em | ||
16 | wrap_in(strong)://span[@class='source_link']/a | ||
17 | test_url: http://www.dushumashang.com/2355 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dvice.com.txt b/inc/3rdparty/site_config/standard/dvice.com.txt deleted file mode 100755 index 1a1990ee..00000000 --- a/inc/3rdparty/site_config/standard/dvice.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | strip://*[@id = 'blog_top_stories'] | ||
2 | strip://*[@id = 'takeover_off'] | ||
3 | strip://*[@id = 'right_gray_box'] | ||
4 | strip://*[@class = 'blog_topics'] | ||
5 | strip://*[@class = 'section_titles'] | ||
6 | |||
7 | author://div[@class = 'post_author_info']/a | ||
8 | date://div[@class = 'post_date_info'] | ||
9 | test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt deleted file mode 100755 index 89a68bcd..00000000 --- a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div [@class="post contain"]/h1 | ||
2 | strip: //div [@class="post contain"]/h1 | ||
3 | body: //div [@class="post contain"] | ||
4 | author: substring-before(//title, ':') | ||
5 | author: substring-before(//title, ' ') | ||
6 | |||
7 | |||
8 | test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt deleted file mode 100755 index ba9d312d..00000000 --- a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1 | ||
2 | date: //div[@class="et_dateUnderTitle"] | ||
3 | author: substring-after(//div[@class="et_authorUnderTitle"], 'By ') | ||
4 | body: //div[@id="et_leftCol640split"] | ||
5 | |||
6 | strip: //div[@id="et_leftCol640splitRight"] | ||
7 | strip: //div[@class="et_light_greybgboxlower"] | ||
8 | test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt deleted file mode 100755 index 36708da3..00000000 --- a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: //div[@class='title_text'] | ||
2 | |||
3 | author: //div[@class='author_text'] | ||
4 | |||
5 | body: //div[@class='story_text']/.. | ||
6 | |||
7 | strip: //b | ||
8 | |||
9 | strip_id_or_class: back_to_top | ||
10 | strip_id_or_class: author_text | ||
11 | strip_id_or_class: title_text | ||
12 | |||
13 | wrap_in(center): //a | ||
14 | |||
15 | dissolve: //a | ||
16 | |||
17 | footnotes: no | ||
18 | test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ebay.com.txt b/inc/3rdparty/site_config/standard/ebay.com.txt deleted file mode 100755 index f17e1f72..00000000 --- a/inc/3rdparty/site_config/standard/ebay.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum'] | ||
2 | |||
3 | strip_image_src: imgLoading_30x30.gif | ||
4 | |||
5 | test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ecetia.com.txt b/inc/3rdparty/site_config/standard/ecetia.com.txt deleted file mode 100755 index d67e9103..00000000 --- a/inc/3rdparty/site_config/standard/ecetia.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://ecetia.com/2011/09/vida-de-jugon-vii-las-tres-es \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/echo-online.de.txt b/inc/3rdparty/site_config/standard/echo-online.de.txt deleted file mode 100755 index e53de23e..00000000 --- a/inc/3rdparty/site_config/standard/echo-online.de.txt +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | # Author: Marvin Dickhaus <github@marvindickhaus.de> | ||
2 | # 2014-10-08 | ||
3 | |||
4 | #Tidy just messes up the DOM | ||
5 | tidy: no | ||
6 | |||
7 | title: //h1 | ||
8 | body: //h2 | //div[@id='artikelteaser'] | //div[@id='artikeltext'] | ||
9 | |||
10 | #Strip | ||
11 | strip_image_src: artikel_a_merken.gif | ||
12 | strip: //div[@class='zusatzinfo'] | ||
13 | |||
14 | #Author: substring is used to remove the " Von " prefix. | ||
15 | author: substring(//li[@class='artikelautor'], 5) | ||
16 | |||
17 | date: //li[@class='artikeldatum'] | ||
18 | |||
19 | #The first two URLs will at some point no longer show | ||
20 | #the full article. There is a time-based paywall | ||
21 | #installed. Using the feed should present valid output | ||
22 | test_url: http://www.echo-online.de/art1231,5503063 | ||
23 | test_url: http://www.echo-online.de/art1168,5502598 | ||
24 | test_url: http://www.echo-online.de/rss/darmstadt.xml | ||
diff --git a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt deleted file mode 100755 index 729affd4..00000000 --- a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class="title"] | ||
2 | author: //div[@class="hosted"]/a | ||
3 | date: substring-after(//div[@class="dateline"]/text(), '|') | ||
4 | |||
5 | strip: //a[@class="top" and @href="#"] | ||
6 | test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt deleted file mode 100755 index 936a191d..00000000 --- a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | date: //div[@class="bb-md-noticia-fecha"] | ||
2 | body: //div[@class="corpo"] | ||
3 | dissolve: //div[@class="bb-md-noticia-extras"] | ||
4 | strip: //strong | ||
5 | strip_id_or_class: bb-md-noticia-foto-autor | ||
6 | strip_id_or_class: bb-md-noticia-foto-bajada | ||
7 | test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/economist.com.txt b/inc/3rdparty/site_config/standard/economist.com.txt deleted file mode 100755 index 8db5fdd6..00000000 --- a/inc/3rdparty/site_config/standard/economist.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | body: //div[@class='main-content'] | ||
2 | body: //article[contains(@class, 'resp-node')] | ||
3 | date: //time[@class='date-created'] | ||
4 | strip: //aside | ||
5 | prune: no | ||
6 | |||
7 | autodetect_next_page: no | ||
8 | |||
9 | test_url: http://www.economist.com/node/21528429 | ||
10 | |||
11 | test_url: http://www.economist.com/news/essays/21623373-which-something-old-and-powerful-encountered-vault | ||
12 | test_contains: the calfskin pages are smooth | ||
13 | test_contains: Books will evolve online and off | ||
diff --git a/inc/3rdparty/site_config/standard/edge-online.com.txt b/inc/3rdparty/site_config/standard/edge-online.com.txt deleted file mode 100755 index cf585815..00000000 --- a/inc/3rdparty/site_config/standard/edge-online.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')] | ||
3 | date: //time[@pubdate]/@datetime | ||
4 | author: //span[@class='author-name'] | ||
5 | prune: no | ||
6 | tidy: no | ||
7 | strip: //footer | ||
8 | |||
9 | replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak --> | ||
10 | |||
11 | single_page_link: //a[contains(@href, '?page=show')] | ||
12 | |||
13 | test_url: http://www.edge-online.com/features/telling-modern-warfares-story \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/edge.org.txt b/inc/3rdparty/site_config/standard/edge.org.txt deleted file mode 100755 index 95805f6e..00000000 --- a/inc/3rdparty/site_config/standard/edge.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class='HomeLeftPannel IMGCTRL']/h2 | ||
2 | body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc'] | ||
3 | tidy: no | ||
4 | |||
5 | test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt b/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt deleted file mode 100755 index 6d5f170a..00000000 --- a/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id='singlePage']//h2 | ||
2 | body: //div[@id='singlePage']//div[contains(@class, 'post')] | ||
3 | strip: //a[@title='Email This Story'] | ||
4 | strip_id_or_class: sociable | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://edition.channel5belize.com/archives/86016 | ||
9 | test_url: http://edition.channel5belize.com/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/edition.cnn.com.txt b/inc/3rdparty/site_config/standard/edition.cnn.com.txt deleted file mode 100755 index 6fc82d24..00000000 --- a/inc/3rdparty/site_config/standard/edition.cnn.com.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] | ||
2 | strip: //a[starts-with(@name, 'em')] | ||
3 | strip: //div[@id='cnnCVP2'] | ||
4 | strip_id_or_class: cnn_strylftcexpbx | ||
5 | strip_id_or_class: cnn_strylctcqrelt | ||
6 | strip_id_or_class: cnn_strybtntoolsbttm | ||
7 | strip_id_or_class: cnn_stryftsbttm | ||
8 | strip_id_or_class: cnn_strybtmcntnt | ||
9 | strip_id_or_class: cnn_stryshrwdgtbtm | ||
10 | strip_id_or_class: cnnGalleryContainer | ||
11 | strip_id_or_class: cnn_strycrcntr | ||
12 | strip_id_or_class: cnn_html_slideshow | ||
13 | prune: no | ||
14 | |||
15 | test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html | ||
16 | test_url: http://edition.cnn.com/2013/08/15/world/africa/nigeria-boko-haram-commander-killed/index.html?eref=edition | ||
17 | test_url: http://rss.cnn.com/rss/edition.rss | ||
18 | test_url: http://rss.cnn.com/rss/edition_technology.rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eetimes.com.txt b/inc/3rdparty/site_config/standard/eetimes.com.txt deleted file mode 100755 index 300db307..00000000 --- a/inc/3rdparty/site_config/standard/eetimes.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[contains(@class, 'grayshowlinks')] | ||
2 | |||
3 | next_page_link: //div[@id='sitecontentcol']//a[.='Next >'] | ||
4 | # Doesn't work (site doesn't always load full content in print view) | ||
5 | #single_page_link: //div[@id='sitecontentcol']//a[contains(@href, 'print=yes')] | ||
6 | |||
7 | test_url: http://www.eetimes.com/document.asp?doc_id=1319966& | ||
8 | test_url: http://www.eetimes.com/rss_simple.asp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ekultura.hu.txt b/inc/3rdparty/site_config/standard/ekultura.hu.txt deleted file mode 100755 index 3756027c..00000000 --- a/inc/3rdparty/site_config/standard/ekultura.hu.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1[@class='style6 nevek'] | ||
2 | |||
3 | body: //div[@class='bal3'] | ||
4 | |||
5 | |||
6 | prune: yes | ||
7 | |||
8 | tidy: yes | ||
9 | convert_double_br_tags: yes | ||
10 | |||
11 | test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elance.com.txt b/inc/3rdparty/site_config/standard/elance.com.txt deleted file mode 100755 index d4b0a9b8..00000000 --- a/inc/3rdparty/site_config/standard/elance.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id='jobDesc-bd']/p | ||
2 | |||
3 | test_url: http://www.elance.com/j/xml-technical-intergration/23687172/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt b/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt deleted file mode 100755 index fa3892c6..00000000 --- a/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | date: //time | ||
2 | title: //h1[contains(@class, "alpha")] | ||
3 | body: //article[contains(@class, "news-post")] | ||
4 | |||
5 | # fix dates - dates as they are won't work as strtotime doesn't understand format (03.28.2013) | ||
6 | replace_string(<time class="gamma">01.): <time class="gamma">January. | ||
7 | replace_string(<time class="gamma">02.): <time class="gamma">February. | ||
8 | replace_string(<time class="gamma">03.): <time class="gamma">March. | ||
9 | replace_string(<time class="gamma">04.): <time class="gamma">April. | ||
10 | replace_string(<time class="gamma">05.): <time class="gamma">May. | ||
11 | replace_string(<time class="gamma">06.): <time class="gamma">June. | ||
12 | replace_string(<time class="gamma">07.): <time class="gamma">July. | ||
13 | replace_string(<time class="gamma">08.): <time class="gamma">August. | ||
14 | replace_string(<time class="gamma">09.): <time class="gamma">September. | ||
15 | replace_string(<time class="gamma">10.): <time class="gamma">October. | ||
16 | replace_string(<time class="gamma">11.): <time class="gamma">November. | ||
17 | replace_string(<time class="gamma">12.): <time class="gamma">December. | ||
18 | |||
19 | prune: no | ||
20 | |||
21 | test_url: http://elderscrollsonline.com/en/rss | ||
22 | test_url: http://elderscrollsonline.com/en/news/post/2013/03/27/developer-question-of-the-week-17 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elektroniknet.de.txt b/inc/3rdparty/site_config/standard/elektroniknet.de.txt deleted file mode 100755 index 56fba5ff..00000000 --- a/inc/3rdparty/site_config/standard/elektroniknet.de.txt +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | title: //h1 | ||
2 | date: //div[@class='datum'] | ||
3 | single_page_link: //a[contains(@href, '?type=99')] | ||
4 | |||
5 | # this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1 | ||
6 | dissolve: //div[@class='artikelMeldung'] | ||
7 | |||
8 | |||
9 | strip_id_or_class: anzeige | ||
10 | strip_id_or_class: top_page_navigation | ||
11 | strip_id_or_class: cr_image_container | ||
12 | strip_id_or_class: cr_image_reference | ||
13 | strip_id_or_class: cr_image_icon | ||
14 | strip_id_or_class: _close_txt | ||
15 | strip_id_or_class: _close_ico | ||
16 | strip_id_or_class: clearer | ||
17 | |||
18 | strip://h1 | ||
19 | strip://h6 | ||
20 | strip://div[contains(@id, 'plista')] | ||
21 | strip://img[contains(@id,'tiny')] | ||
22 | strip://img[@class='cr_image'] | ||
23 | |||
24 | # strip url at the top | ||
25 | strip: //p[@style='font-size: 10px;'] | ||
26 | |||
27 | test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elmalpensante.com.txt b/inc/3rdparty/site_config/standard/elmalpensante.com.txt deleted file mode 100755 index 435c6c20..00000000 --- a/inc/3rdparty/site_config/standard/elmalpensante.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | single_page_link: //a[contains(@href, 'print_contenido')] | ||
2 | title: //h2 | ||
3 | author: //div[@class="autor"] | ||
4 | test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elpais.com.txt b/inc/3rdparty/site_config/standard/elpais.com.txt deleted file mode 100755 index c6f9787b..00000000 --- a/inc/3rdparty/site_config/standard/elpais.com.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | title: //meta[@name='DC.title']/@content | ||
2 | title: //div[contains(@class, 'cabecera_noticia')]//h1 | ||
3 | date: //meta[@name='DC.date']/@content | ||
4 | date: //meta[@name='date']/@content | ||
5 | body: //div[@class='columna_texto'] | ||
6 | body: //div[@id='cuerpo_noticia'] | ||
7 | body: //div[@class='estructura_2col_1zq']//div[@class='margen_n'] | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | strip_id_or_class: disposicion_vertical | ||
12 | strip_id_or_class: ampliar_foto | ||
13 | strip_id_or_class: utilidades | ||
14 | strip_id_or_class: info_relacionada | ||
15 | strip_id_or_class: m-kiosko | ||
16 | strip_id_or_class: info_complementa | ||
17 | |||
18 | strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')] | ||
19 | strip: //div[@id='coment' or @id='foros_not'] | ||
20 | |||
21 | test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html | ||
22 | test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/emaratalyoum.com.txt b/inc/3rdparty/site_config/standard/emaratalyoum.com.txt deleted file mode 100755 index 3d1313e2..00000000 --- a/inc/3rdparty/site_config/standard/emaratalyoum.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id='main-column']//div[@class='content'] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601844 | ||
6 | test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601842 | ||
7 | test_url: http://www.emaratalyoum.com/public-sports-1.533088?ot=ot.AjaxPageLayout \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/en.espnf1.com.txt b/inc/3rdparty/site_config/standard/en.espnf1.com.txt deleted file mode 100755 index 2ca0216b..00000000 --- a/inc/3rdparty/site_config/standard/en.espnf1.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | strip: //div[@class='rl'] | ||
3 | strip: //p[@class='authdesc'] | ||
4 | strip: //p[@class='strybtm'] | ||
5 | strip: //div[@id='stryFtrLft'] | ||
6 | strip: //div[@id='f1Conversation'] | ||
7 | strip: //div[@id='cmtSpncrRuler'] | ||
8 | strip: //div[@id='stryComments'] | ||
9 | strip: //div[@id='athrData'] | ||
10 | test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/engadget.com.txt b/inc/3rdparty/site_config/standard/engadget.com.txt deleted file mode 100755 index 52acddb0..00000000 --- a/inc/3rdparty/site_config/standard/engadget.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@class='post_body'] | ||
3 | date: //*[@class='post_time'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt deleted file mode 100755 index 48f301fe..00000000 --- a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@class="post_content"] | ||
3 | author: //p[@class="author"]/a | ||
4 | date: //p[@class="date"] | ||
5 | strip: //h2 | ||
6 | strip: //header | ||
7 | test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt deleted file mode 100755 index 97365994..00000000 --- a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //span[@id='DetailedTitle'] | ||
2 | body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary'] | ||
3 | strip_id_or_class: sidebar | ||
4 | strip_id_or_class: Skyscrapper_Body | ||
5 | strip: //td[@class='DetailedSummary']/table[position() != 1] | ||
6 | prune: no | ||
7 | test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/enikos.gr.txt b/inc/3rdparty/site_config/standard/enikos.gr.txt deleted file mode 100755 index ddd51c4b..00000000 --- a/inc/3rdparty/site_config/standard/enikos.gr.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@id='article']//div[contains(@class, 'inside')] | ||
2 | |||
3 | strip_id_or_class: tags | ||
4 | strip_id_or_class: actions | ||
5 | strip_id_or_class: google-ads | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt deleted file mode 100755 index a756c457..00000000 --- a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | author://div[@class = 'article-author']/span[@class = 'byline'] | ||
2 | title://h1[@class = 'heading'] | ||
3 | body://div[@id = 'related-article-links'] | ||
4 | strip://div[@id = 'comment-sort-order'] | ||
5 | strip://div[@id = 'my-profile'] | ||
6 | strip://div[@class = 'article-author'] | ||
7 | strip://div[@class = 'bg-f8f1d8 width-385 text-left'] | ||
8 | strip://div[@id = 'login-status'] | ||
9 | strip://div[@class = 'puff-padding'] | ||
10 | test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ericsuh.com.txt b/inc/3rdparty/site_config/standard/ericsuh.com.txt deleted file mode 100755 index d25140c5..00000000 --- a/inc/3rdparty/site_config/standard/ericsuh.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | date: //h6[@class='datetime']/child::text() | ||
2 | author: string("Eric J. Suh") | ||
3 | footnotes: yes | ||
4 | test_url: http://www.ericsuh.com/blog/posts/2012/8/strange-numbers.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/es.hu.txt b/inc/3rdparty/site_config/standard/es.hu.txt deleted file mode 100755 index 21691a56..00000000 --- a/inc/3rdparty/site_config/standard/es.hu.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title'])) | ||
2 | |||
3 | body: //div[@class='doc'] | ||
4 | |||
5 | prune: yes | ||
6 | |||
7 | tidy: yes | ||
8 | convert_double_br_tags: yes | ||
9 | |||
10 | strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')] | ||
11 | test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt deleted file mode 100755 index fd453a19..00000000 --- a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class='headline']/div[@class='name'] | ||
2 | |||
3 | strip_image_src: 'http://cdn.themis-media.com/media/global/images/library/deriv/115/115825.png' | ||
4 | |||
5 | next_page_link: //a[@class='next_page'] | ||
6 | |||
7 | strip_comments: no | ||
8 | test_url: http://www.escapistmagazine.com/articles/view/columns/criticalintel/10302-I-Hate-Magic \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/espn.go.com.txt b/inc/3rdparty/site_config/standard/espn.go.com.txt deleted file mode 100755 index 06476296..00000000 --- a/inc/3rdparty/site_config/standard/espn.go.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
2 | body: //div[contains(@class, 'article')] | ||
3 | strip: //div[contains(@class, 'mod-inline')] | ||
4 | strip: //*/span[@class='page-actions'] | ||
5 | strip: //div[@class='page-actions']/* | ||
6 | strip: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
7 | strip: //div[@class='mod-blog-navigation'] | ||
8 | strip: //div[@class='monthday'] | ||
9 | strip: //div[@class='time'] | ||
10 | strip: //div[@class='timeofday'] | ||
11 | strip: //div[contains(@class, 'mod-conversations')] | ||
12 | test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/esquire.com.txt b/inc/3rdparty/site_config/standard/esquire.com.txt deleted file mode 100755 index b9cb1e55..00000000 --- a/inc/3rdparty/site_config/standard/esquire.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //div[@id='byline'] | ||
3 | |||
4 | body: //div[@id='printBody'] | ||
5 | |||
6 | single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/')) | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810 | ||
11 | test_url: http://www.esquire.com/blogs/politics/police-getting-leftover-armoured-iraq-trucks-112513 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt deleted file mode 100755 index 9a922392..00000000 --- a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //*[@itemprop='headline'] | ||
2 | author: //*[@itemprop='author'] | ||
3 | date: //*[@itemprop='datePublished'] | ||
4 | body: //*[@itemprop='articleBody'] | ||
5 | strip: //*[contains(@class, 'instapaper_ignore')] | ||
6 | test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/etc.se.txt b/inc/3rdparty/site_config/standard/etc.se.txt deleted file mode 100755 index 95f8cf78..00000000 --- a/inc/3rdparty/site_config/standard/etc.se.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | strip_id_or_class: 'left' | ||
2 | strip_id_or_class: 'right' | ||
3 | strip_id_or_class: 'block-belowcontent' | ||
4 | author: //span[@class = 'name']/a | ||
5 | date: //div[@class= 'datum'] | ||
6 | test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt deleted file mode 100755 index bfa2c5dc..00000000 --- a/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://eternabuenosaires.com/2011/09/calle-adolfo-bioy-casares \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eurogamer.net.txt b/inc/3rdparty/site_config/standard/eurogamer.net.txt deleted file mode 100755 index 8931becb..00000000 --- a/inc/3rdparty/site_config/standard/eurogamer.net.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //p[@class='strapline'] | //div[@class='cover-image'] | //article[@class='hd'] | ||
2 | strip: //div[@class='social top'] | ||
3 | strip: //p[@class='byline'] | ||
4 | |||
5 | date: //span[@itemprop='datePublished'] | ||
6 | author: //a[@itemprop='author']/text() | ||
7 | |||
8 | test_url: http://www.eurogamer.net/articles/2014-08-20-bungie-ordered-to-return-shares-to-composer-marty-odonnell | ||
9 | test_url: http://www.eurogamer.net/articles/2014-08-20-invisible-inc-does-espionage-justice | ||
diff --git a/inc/3rdparty/site_config/standard/evo.co.uk.txt b/inc/3rdparty/site_config/standard/evo.co.uk.txt deleted file mode 100755 index ccb4f879..00000000 --- a/inc/3rdparty/site_config/standard/evo.co.uk.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | author: substring-after(//div[@class='articleauthor'],'By ') | ||
2 | |||
3 | # Blog posts | ||
4 | date: //div[@class='articledate'] | ||
5 | # News | ||
6 | date: //div[@class='articledate_b'] | ||
7 | |||
8 | body: //div[@class='articletext'] | ||
9 | |||
10 | convert_double_br_tags: yes | ||
11 | test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/expressen.se.txt b/inc/3rdparty/site_config/standard/expressen.se.txt deleted file mode 100755 index d81d3251..00000000 --- a/inc/3rdparty/site_config/standard/expressen.se.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'b-headline_article')] | ||
2 | body: //div[contains(@class, 'b-article_print')] | ||
3 | |||
4 | single_page_link: //div[contains(@class, 'b-page__footer__actions')]//a[contains(@href, 'print=true')] | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at | ||
9 | test_url: http://www.expressen.se/gt/polis-om-styckmordet-extremt-markligt-fall/ | ||
10 | test_url: http://www.expressen.se/Pages/OutboundFeedsPage.aspx?id=3642159&viewstyle=rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/extracine.com.txt b/inc/3rdparty/site_config/standard/extracine.com.txt deleted file mode 100755 index 52b598da..00000000 --- a/inc/3rdparty/site_config/standard/extracine.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://extracine.com/2011/09/straw-dogs-la-original \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/f1actual.com.txt b/inc/3rdparty/site_config/standard/f1actual.com.txt deleted file mode 100755 index 6ef2738a..00000000 --- a/inc/3rdparty/site_config/standard/f1actual.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://f1actual.com/2011/09/previo-gran-premio-de-singapur \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/facebook.com.txt b/inc/3rdparty/site_config/standard/facebook.com.txt deleted file mode 100755 index 26d4f905..00000000 --- a/inc/3rdparty/site_config/standard/facebook.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@id='imagestage'] | ||
2 | body: //div[contains(@class, 'userContentWrapper')] | ||
3 | |||
4 | strip_id_or_class: commentable | ||
5 | |||
6 | prune: no | ||
7 | tidy: no | ||
8 | |||
9 | # single_page_link: replace(substring-after(//noscript//meta[@http-equiv="refresh"]/@content, 'URL='), "&", "&") | ||
10 | |||
11 | test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182 | ||
12 | test_contains: holding an extraordinary session in Brussels this month | ||
diff --git a/inc/3rdparty/site_config/standard/facta.co.jp.txt b/inc/3rdparty/site_config/standard/facta.co.jp.txt deleted file mode 100755 index 4c96a1a4..00000000 --- a/inc/3rdparty/site_config/standard/facta.co.jp.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | bosdy: //div[@class='content'] | ||
2 | |||
3 | test_url: http://facta.co.jp/blog/archives/20111026001026.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/falter.at.txt b/inc/3rdparty/site_config/standard/falter.at.txt deleted file mode 100755 index 2bfcc9b4..00000000 --- a/inc/3rdparty/site_config/standard/falter.at.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //a[contains(@href, '/kategorie/autoren')] | ||
3 | date: //a[contains(@href, '/falter/ausgabe')] | ||
4 | body: //article[@class='spanMain'] | ||
5 | |||
6 | # cleanup | ||
7 | strip_id_or_class: 'respond' | ||
8 | strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif'] | ||
9 | strip_id_or_class: 'meta' | ||
10 | strip_id_or_class: 'servicebox' | ||
11 | strip_id_or_class: 'related' | ||
12 | strip_id_or_class: 'twitter-share-button' | ||
13 | strip: //br | ||
14 | test_url: http://www.falter.at/falter/2013/03/26/der-dandy-auf-der-sinkenden-galeere/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fanfiction.net.txt b/inc/3rdparty/site_config/standard/fanfiction.net.txt deleted file mode 100755 index e7cab4d4..00000000 --- a/inc/3rdparty/site_config/standard/fanfiction.net.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //*[@id = 'story text'] | ||
2 | author: //a[starts-with(@href, '/u/')] | ||
3 | next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") | ||
4 | autodetect_next_page:yes | ||
5 | strip_id_or_class: 'a2a_kit' | ||
6 | test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fastcompany.com.txt b/inc/3rdparty/site_config/standard/fastcompany.com.txt deleted file mode 100755 index a6417237..00000000 --- a/inc/3rdparty/site_config/standard/fastcompany.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //h5[@class='byline']//a | ||
3 | date: //h5[@class='date'] | ||
4 | body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")] | ||
5 | strip_id_or_class: article-top-wrapper | ||
6 | strip_id_or_class: footer-message | ||
7 | strip_id_or_class: print-logo | ||
8 | strip: //cite | ||
9 | strip://*[@class='timestamp'] | ||
10 | strip://div[@id='page_right'] | ||
11 | strip://section[@id='header_region'] | ||
12 | strip://h1[@class='node-title'] | ||
13 | strip://div[@class='node-submitted'] | ||
14 | strip_id_or_class: skipnav | ||
15 | test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity | ||
16 | test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt deleted file mode 100755 index 47048a1b..00000000 --- a/inc/3rdparty/site_config/standard/faz.net.txt +++ /dev/null | |||
@@ -1,101 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | # Complete rewrite of the faz.net template as the standard one is broken | ||
3 | # I tried to consider as many page variants as possible, which was some serious work | ||
4 | |||
5 | tidy: no | ||
6 | prune: no | ||
7 | |||
8 | # Title | ||
9 | title: //p[@class='Content HeadlineShort'] | ||
10 | |||
11 | # Set author | ||
12 | author: substring-after(//span[@class='Autor'], 'von ') | ||
13 | author: //span[@class='caps last']/span[@class='caps last'] | ||
14 | author: //a[@rel='author'] | ||
15 | |||
16 | # Set date | ||
17 | date: //span[@class='Datum'] | ||
18 | date: //span[@class='Datum'],/span | ||
19 | |||
20 | # Fetch full multipage articles | ||
21 | next_page_link: //a[@title='Nächste Seite'] | ||
22 | |||
23 | # Content is here | ||
24 | body: //div[@class='Artikel'] | ||
25 | |||
26 | # Tidy up before article | ||
27 | strip: //div[@id='FAZHeaderNeu'] | ||
28 | strip: //h2[@itemprop='headline'] | ||
29 | strip: //span[@class='Datum'] | ||
30 | strip: //span[@class='Autor'] | ||
31 | strip_id_or_class: ArticlePagerTop | ||
32 | |||
33 | # General cleanup | ||
34 | strip: //div[@class='clear'] | ||
35 | strip: //a[@title='Zur Homepage FAZ.NET'] | ||
36 | strip: //iframe | ||
37 | replace_string( · ): | ||
38 | |||
39 | # Remove tracking and ads | ||
40 | strip_image_src: /l.gif? | ||
41 | strip: //div[contains(@style, 'background-image')] | ||
42 | strip: //img[@width='1'] | ||
43 | strip_id_or_class: invisible | ||
44 | strip_id_or_class: Anzeige | ||
45 | strip_id_or_class: billboard | ||
46 | |||
47 | # Remove various text boxes and social media foo | ||
48 | strip_id_or_class: WeitereBeitraege | ||
49 | strip_id_or_class: WBListe | ||
50 | strip_id_or_class: AutorenModul | ||
51 | strip_id_or_class: Community | ||
52 | strip_id_or_class: SocialMediaStatus | ||
53 | strip_id_or_class: RelatedLinkBox | ||
54 | strip_id_or_class: MultimediaNavigation | ||
55 | strip_id_or_class: IndexTitel | ||
56 | |||
57 | # Fix picture caps and pictures (use better resolution and remove clutter) | ||
58 | strip_id_or_class: LightBoxOverlay | ||
59 | strip_id_or_class: exitLarge | ||
60 | strip_id_or_class: PagerBox | ||
61 | strip_id_or_class: Bildnachweis | ||
62 | strip_id_or_class: Bildueberschrift | ||
63 | strip_id_or_class: Bildbeschreibung | ||
64 | strip_id_or_class: ArtikelBild610 | ||
65 | strip_id_or_class: MediaLink | ||
66 | strip_id_or_class: FotoBoxInnerLeft | ||
67 | strip_id_or_class: BilderRelatedLinks | ||
68 | |||
69 | # Remove clutter after article | ||
70 | strip_id_or_class: ArticlePagerBottom | ||
71 | strip_id_or_class: backToHome | ||
72 | strip_id_or_class: ArtikelAbbinder | ||
73 | strip_id_or_class: lesermeinungscontainer | ||
74 | strip_id_or_class: ThemenLinks | ||
75 | strip_id_or_class: rechtehinweis | ||
76 | strip_id_or_class: FAZArtikelMap | ||
77 | strip_id_or_class: FAZArtikelKommentare | ||
78 | strip_id_or_class: ArtikelKommentieren | ||
79 | strip_id_or_class: FAZArtikelFunktionen | ||
80 | strip_id_or_class: mailLB | ||
81 | strip_id_or_class: FAZContentRight | ||
82 | strip_id_or_class: stageModule | ||
83 | strip_id_or_class: ContentFooter | ||
84 | strip_id_or_class: ServicesFooter | ||
85 | strip_id_or_class: FAZFooter | ||
86 | |||
87 | # Clean up stuff present just in some articles | ||
88 | strip_id_or_class: Teaser620 | ||
89 | strip_id_or_class: TeaserMultimedia | ||
90 | strip_id_or_class: VideoBox | ||
91 | |||
92 | # Remove as soon as Wallabag maight be able to embed flash video | ||
93 | strip_id_or_class: mmoObjectAsTeaserInArticle | ||
94 | strip_id_or_class: additionalStylesAudioVideo | ||
95 | strip_id_or_class: hideMMElements | ||
96 | |||
97 | # Try it yourself | ||
98 | test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken | ||
99 | test_url: http://www.faz.net/aktuell/politik/inland/allensbach-analyse-im-namen-des-volkes-13106492.html | ||
100 | test_url: http://www.faz.net/aktuell/feuilleton/kino/video-filmkritiken/video-filmkritik-when-animals-dream-zerrissene-jugend-13105772.html | ||
101 | |||
diff --git a/inc/3rdparty/site_config/standard/fertigung.de.txt b/inc/3rdparty/site_config/standard/fertigung.de.txt deleted file mode 100755 index 90145e58..00000000 --- a/inc/3rdparty/site_config/standard/fertigung.de.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | title: //title | ||
2 | |||
3 | body: //div[@id='content'] | ||
4 | |||
5 | strip: (//div[@id='content']/h2)[1] | ||
6 | |||
7 | strip: //h2[contains(., 'mehr News')]/following::* | ||
8 | strip: //h2[contains(., 'mehr News')] | ||
9 | |||
10 | strip: //div[contains(@class, 'indizar')]/following::* | ||
11 | strip: //div[contains(@class, 'indizar')] | ||
12 | |||
13 | strip: //h1[contains(@class, 'single')]/preceding::* | ||
14 | strip: //h1[contains(@class, 'single')] | ||
15 | |||
16 | strip_id_or_class: plista_widget | ||
17 | |||
18 | prune: no | ||
19 | |||
20 | next_page_link: //a[contains(., 'Weiter')] | ||
21 | |||
22 | test_url: http://www.fertigung.de/2013/04/igus-neuer-energiekettenkatalog/ | ||
23 | test_url: http://www.fertigung.de/2013/04/dynamisch-und-hochpraezise/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fictionpress.com.txt b/inc/3rdparty/site_config/standard/fictionpress.com.txt deleted file mode 100755 index 19ec16b0..00000000 --- a/inc/3rdparty/site_config/standard/fictionpress.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: id('storytext') | ||
2 | author: //a[starts-with(@href, '/u/')] | ||
3 | #next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") | ||
4 | strip_id_or_class: 'a2a_kit' | ||
5 | test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ficwad.com.txt b/inc/3rdparty/site_config/standard/ficwad.com.txt deleted file mode 100755 index 081f0bb0..00000000 --- a/inc/3rdparty/site_config/standard/ficwad.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h4 | ||
2 | author: //span[@class="author"] | ||
3 | body: //div[@id="story"] | ||
4 | strip_id_or_class: summary | ||
5 | strip_id_or_class: meta | ||
6 | strip_id_or_class: storyfoot | ||
7 | convert_double_br_tags: yes | ||
8 | prune: no | ||
9 | |||
10 | # Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface. | ||
11 | |||
12 | test_url: http://www.ficwad.com/story/158977 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt deleted file mode 100755 index 0c967db0..00000000 --- a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //div[@id='y-article-bd'] | ||
3 | body: //div[contains(@class, 'yom-art-content')] | ||
4 | strip: //div[contains(@class, 'related-companies')] | ||
5 | strip: //div[@id='y-article-related'] | ||
6 | strip: //div[@id='ypf-article-related'] | ||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')] | ||
11 | |||
12 | test_url: http://finance.yahoo.com/news/canadian-orebodies-gives-notice-exercise-130000032.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt deleted file mode 100755 index 43aef750..00000000 --- a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | date: //div[@class='notes']/a | ||
2 | body: //div[@id='content'] | ||
3 | |||
4 | strip_id_or_class: tags | ||
5 | strip_id_or_class: permalink | ||
6 | strip_id_or_class: notes | ||
7 | strip_id_or_class: post_nav | ||
8 | strip: //div[@id='content']//h2 | ||
9 | strip_id_or_class: right_column | ||
10 | test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/firstthings.com.txt b/inc/3rdparty/site_config/standard/firstthings.com.txt deleted file mode 100755 index ce972bac..00000000 --- a/inc/3rdparty/site_config/standard/firstthings.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@class='articleTitle'] | ||
2 | author: //div[@class='articleAuthor'] | ||
3 | body: //div[@class='articleContent'] | ||
4 | prune: no | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fivechapters.com.txt b/inc/3rdparty/site_config/standard/fivechapters.com.txt deleted file mode 100755 index 9614d2f6..00000000 --- a/inc/3rdparty/site_config/standard/fivechapters.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | test_url: http://www.fivechapters.com/2014/the-saddest-writer-in-america-part-two/ | ||
diff --git a/inc/3rdparty/site_config/standard/fivefilters.org.txt b/inc/3rdparty/site_config/standard/fivefilters.org.txt deleted file mode 100755 index f37f02b9..00000000 --- a/inc/3rdparty/site_config/standard/fivefilters.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //section[contains(@class, 'container')] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://fivefilters.org/kindle-it/ | ||
diff --git a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt deleted file mode 100755 index d0a0a772..00000000 --- a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: substring-after(//title, 'Right:') | ||
2 | body: //div[@class = 'post-body'] | ||
3 | author: substring-after(//*[@class='post-author'], 'by') | ||
4 | date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a) | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt b/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt deleted file mode 100755 index 2053f801..00000000 --- a/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip_id_or_class: linenos | ||
2 | test_url: http://www.flyingmachinestudios.com/programming/whoops-dci-refactoring/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fm4.orf.at.txt b/inc/3rdparty/site_config/standard/fm4.orf.at.txt deleted file mode 100755 index 5db3e58c..00000000 --- a/inc/3rdparty/site_config/standard/fm4.orf.at.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | author: //div[@class='authorDescription']/h2 | ||
2 | body: //div[@id='story'] | ||
3 | date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-') | ||
4 | title: //h1[@class='detail'] | ||
5 | strip: //div[@class='fact'] | ||
6 | |||
7 | test_url: http://fm4.orf.at/stories/1689156/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fnal.gov.txt b/inc/3rdparty/site_config/standard/fnal.gov.txt deleted file mode 100755 index e404ccb8..00000000 --- a/inc/3rdparty/site_config/standard/fnal.gov.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: normalize(//h1) | ||
2 | |||
3 | author: //td/p[position()=last()]/em | ||
4 | |||
5 | # I swear, this is really the best way to do this | ||
6 | date: normalize(//td[contains(@style, "color: #ffffff")]) | ||
7 | |||
8 | # my god, it's full of tables | ||
9 | body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td | ||
10 | strip: //h1 | ||
11 | |||
12 | # the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output. | ||
13 | strip: //p[position()=last()]/em | ||
14 | strip: //p[position()=last()]/child::text() | ||
15 | test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/focus.de.txt b/inc/3rdparty/site_config/standard/focus.de.txt deleted file mode 100755 index 6da3687e..00000000 --- a/inc/3rdparty/site_config/standard/focus.de.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] | ||
4 | |||
5 | date: //div[@class='articleHead']/span[@class='created'] | ||
6 | |||
7 | body: //div[@id='article'] | ||
8 | |||
9 | strip: //span[@class='markerText'] | ||
10 | strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] | ||
11 | strip: //div[@class='sidebar'] | ||
12 | strip: //div[@class='starbar'] | ||
13 | strip: //div[@class='actions clearfix'] | ||
14 | strip: //div[@id='commentForm'] | ||
15 | strip: //div[@id='commentSent'] | ||
16 | strip: //div[@id='comments'] | ||
17 | strip: //div[@class='similarityBlock'] | ||
18 | |||
19 | test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/folklore.org.txt b/inc/3rdparty/site_config/standard/folklore.org.txt deleted file mode 100755 index ed23a0b6..00000000 --- a/inc/3rdparty/site_config/standard/folklore.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[1]/td[2] | ||
2 | date: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[2]/td[2] | ||
3 | body: //div[@class='main'] | ||
4 | test_url: http://www.folklore.org/StoryView.py?story=Calculator_Construction_Set.txt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/food.com.txt b/inc/3rdparty/site_config/standard/food.com.txt deleted file mode 100755 index a70da766..00000000 --- a/inc/3rdparty/site_config/standard/food.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@id='print-area'] | ||
2 | title: //h1[contains(@class, 'section-title')] | ||
3 | single_page_link: //a[@id='prntrec'] | ||
4 | strip_image_src: food-logo-small | ||
5 | strip_id_or_class: timer | ||
6 | strip_id_or_class: photo-sm | ||
7 | strip_id_or_class: page-header | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | test_url: http://www.food.com/recipe/couldnt-be-easier-bbq-pork-tenderloin-crock-pot-317152 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fool.com.txt b/inc/3rdparty/site_config/standard/fool.com.txt deleted file mode 100755 index 89cb8b9a..00000000 --- a/inc/3rdparty/site_config/standard/fool.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@class='entry-content'] | ||
2 | date: //meta[@name="date"]/@content | ||
3 | author: //meta[@name="author"]/@content | ||
4 | |||
5 | strip_id_or_class: ecapShell | ||
6 | strip_id_or_class: noindent | ||
7 | strip_id_or_class: targetedPromotion | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/forbes.com.txt b/inc/3rdparty/site_config/standard/forbes.com.txt deleted file mode 100755 index 9e1d04c1..00000000 --- a/inc/3rdparty/site_config/standard/forbes.com.txt +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | title: //hgroup//h1 | ||
2 | title: //span[@class='mainarttitle'] | ||
3 | |||
4 | body: //div[@id='leftRail']//div[contains(@class, 'body')] | ||
5 | |||
6 | author: //meta[@name="author"]/@content | ||
7 | author: //span[@class='mainartauthor'] | ||
8 | |||
9 | date: substring-before(//hgroup//h6, '@') | ||
10 | date: //span[@class='mainartdate'] | ||
11 | |||
12 | prune: no | ||
13 | strip: //aside | ||
14 | strip_id_or_class: sticky_sharing | ||
15 | strip_id_or_class: pagination | ||
16 | strip_id_or_class: controlsbox | ||
17 | strip_id_or_class: storyboxes | ||
18 | strip_id_or_class: sponsoredlinks | ||
19 | strip_id_or_class: nextpage | ||
20 | strip_id_or_class: contextuallinks | ||
21 | strip_id_or_class: article_actions | ||
22 | strip_id_or_class: engagement_block | ||
23 | |||
24 | single_page_link: //a[contains(@href, '/print/')] | ||
25 | |||
26 | test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html | ||
27 | test_url: http://www.forbes.com/sites/bruceupbin/2012/09/11/the-iphone-5-winners-and-losers/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/foreignaffairs.com.txt b/inc/3rdparty/site_config/standard/foreignaffairs.com.txt deleted file mode 100755 index cf8b742f..00000000 --- a/inc/3rdparty/site_config/standard/foreignaffairs.com.txt +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | # TIDY | ||
2 | #tidy: no | ||
3 | # PRUNE | ||
4 | #prune: no | ||
5 | |||
6 | # SINGLE PAGE | ||
7 | single_page_link: //div[@class='showlinks']/a | ||
8 | |||
9 | # TITLE | ||
10 | title: //h1[@class="title"] | ||
11 | |||
12 | # AUTHOR | ||
13 | author: //div[contains(@class,"field-field-article-display-authors")]/div/div/a/text() | ||
14 | |||
15 | # DATE | ||
16 | date: //div[contains(@class,"field-field-article-issue")]/div/div/a/text() | //span[@class="date-display-single"] | ||
17 | |||
18 | # BODY | ||
19 | body: //div[contains(@class,"content-resize")] | ||
20 | |||
21 | # Remove clutter | ||
22 | strip: //div[@class="article-sidebar"] | ||
23 | strip: //div[@class="showlinks"] | ||
24 | strip: //div[contains(@class,"premium-box")] | ||
25 | strip: //div[contains(@class,"premium-box")] | ||
26 | strip: //table[contains(@border,"2")] | ||
27 | |||
28 | # Fix picture captions | ||
29 | wrap_in(small): //p/img/following-sibling::em | ||
30 | wrap_in(small): //p[img]/text() | ||
31 | |||
32 | # Fix sub-headlines | ||
33 | wrap_in(h3): //div[contains(@class,"field-field-article-subtitle")]/div/div/text() | ||
34 | test_url: http://www.foreignaffairs.com/articles/138810/pierre-n-leval/the-long-arm-of-international-law \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt deleted file mode 100755 index 853a5b7b..00000000 --- a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //div[@class='translateHead']//h1 | //div[@id='art-mast']//h1 | ||
2 | author: substring-after(//span[@id='by-line'], 'BY ') | ||
3 | date: //span[@id='pub-date'] | ||
4 | body: (//article//img[contains(@class, 'main_photo')])[1] | (//article//div[contains(@class, 'full_post_content')])[1] | ||
5 | #body: //div[@id='art-mast']/h2 | //div[@class='translateBody'] | //div[@id='art-body'] | ||
6 | #Strip inside article content | ||
7 | strip: //div[@id='share-box'] | ||
8 | strip: //div[@id='special-box | ||
9 | |||
10 | strip_id_or_class: side_panel | ||
11 | |||
12 | prune: no | ||
13 | |||
14 | single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')] | ||
15 | single_page_link: //a[text()='SINGLE PAGE'] | ||
16 | |||
17 | test_url: http://www.foreignpolicy.com/articles/2014/07/22/the_end_game_in_gaza_netanyahu_hamas | ||
18 | test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me | ||
19 | test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/forsvaret.no.txt b/inc/3rdparty/site_config/standard/forsvaret.no.txt deleted file mode 100755 index c1bd2bac..00000000 --- a/inc/3rdparty/site_config/standard/forsvaret.no.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@class="articleHeader"]/h1 | ||
2 | author: //p[@class="byline"] | ||
3 | date: //p[contains(@class,"publishedDate")]/span | ||
4 | # remove the right menu | ||
5 | strip: //div[contains(@class,"aside")] | ||
6 | # remove some SharePoint webpart label junk | ||
7 | strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] | ||
8 | strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"] | ||
9 | test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/foxnews.com.txt b/inc/3rdparty/site_config/standard/foxnews.com.txt deleted file mode 100755 index e19c77db..00000000 --- a/inc/3rdparty/site_config/standard/foxnews.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | prune: no | ||
2 | |||
3 | author: //meta[@name="dc.publisher"]/@content | ||
4 | date: //meta[@name="dc.date"]/@content | ||
5 | strip: //p[contains(@class, 'contributor vcard')] | ||
6 | replace_string(<ul><li><div class="photo">): <div class="photo"> | ||
7 | strip: //p[a[contains(., 'Click here to read more on this story ')]] | ||
8 | |||
9 | test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/freelancer.com.txt b/inc/3rdparty/site_config/standard/freelancer.com.txt deleted file mode 100755 index 78d37729..00000000 --- a/inc/3rdparty/site_config/standard/freelancer.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id="projectDetailsContent"]//td | ||
2 | |||
3 | test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/freytag-film.com.txt b/inc/3rdparty/site_config/standard/freytag-film.com.txt deleted file mode 100755 index c83f8303..00000000 --- a/inc/3rdparty/site_config/standard/freytag-film.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class = 'instapaperbody'] | ||
2 | convert_double_br_tags: no | ||
3 | date: //div[@class='instadate'] | ||
4 | title: //h2[@class = 'instatitle'] | ||
5 | test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fria.nu.txt b/inc/3rdparty/site_config/standard/fria.nu.txt deleted file mode 100755 index 9d8eff97..00000000 --- a/inc/3rdparty/site_config/standard/fria.nu.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] | ||
2 | author: //article//div[contains(@class, 'field-byline')] | ||
3 | strip_id_or_class: rekommenderade | ||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: annonser | ||
6 | |||
7 | test_url: http://www.fria.nu/artikel/112079 | ||
8 | test_url: http://www.fria.nu/taxonomy/term/1928/all/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/friatidningen.se.txt b/inc/3rdparty/site_config/standard/friatidningen.se.txt deleted file mode 100755 index 1e4abc5a..00000000 --- a/inc/3rdparty/site_config/standard/friatidningen.se.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] | ||
2 | author: //article//div[contains(@class, 'field-byline')] | ||
3 | strip_id_or_class: rekommenderade | ||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: annonser | ||
6 | |||
7 | test_url: http://www.friatidningen.se/artikel/112074 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/friendskorner.com.txt b/inc/3rdparty/site_config/standard/friendskorner.com.txt deleted file mode 100755 index b067d88a..00000000 --- a/inc/3rdparty/site_config/standard/friendskorner.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ft.com.txt b/inc/3rdparty/site_config/standard/ft.com.txt deleted file mode 100755 index e66b9603..00000000 --- a/inc/3rdparty/site_config/standard/ft.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[contains(@class, 'ft-story-body')] | ||
2 | |||
3 | author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ') | ||
4 | date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|') | ||
5 | test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ftchinese.com.txt b/inc/3rdparty/site_config/standard/ftchinese.com.txt deleted file mode 100755 index 5c94d9b0..00000000 --- a/inc/3rdparty/site_config/standard/ftchinese.com.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | # Modified to define the single_page_link | ||
2 | # This filter is tested on: | ||
3 | # http://www.ftchinese.com/story/001047373 | ||
4 | # http://www.ftchinese.com/story/001047631 | ||
5 | # http://www.ftchinese.com/story/001047622/?print=y | ||
6 | # http://www.ftchinese.com/story/001049052 | ||
7 | # http://www.ftchinese.com/story/001049088 | ||
8 | |||
9 | title:substring-before(//title, '-') | ||
10 | author: //div[@class='byline']/a | ||
11 | date: //a[@class='storytime'] | ||
12 | #Set date in print view | ||
13 | #date: //div[@class='byline']/a/following-sibling::a | ||
14 | body: //div[@id="bodytext"] | ||
15 | strip://div[@class='pagination'] | ||
16 | single_page_link://div[@class='pagination']/a[.='全文'] | ||
17 | #next_page_link: //div[@class='pagination']//a[.='下一页'] | ||
18 | test_url: http://www.ftchinese.com/story/001049088 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ftd.de.txt b/inc/3rdparty/site_config/standard/ftd.de.txt deleted file mode 100755 index 7d76af00..00000000 --- a/inc/3rdparty/site_config/standard/ftd.de.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft'] | ||
2 | single_page_link: //a[@class='icon print'] | ||
3 | |||
4 | test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html | ||
5 | test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fubiz.net.txt b/inc/3rdparty/site_config/standard/fubiz.net.txt deleted file mode 100755 index 0dc30475..00000000 --- a/inc/3rdparty/site_config/standard/fubiz.net.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class = 'entry'] | ||
2 | |||
3 | test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/futurezone.at.txt b/inc/3rdparty/site_config/standard/futurezone.at.txt deleted file mode 100755 index 808c1f1b..00000000 --- a/inc/3rdparty/site_config/standard/futurezone.at.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | date: //span[@class='date'] | ||
2 | strip: //div[@class='postsidebar'] | ||
3 | body: //div[@class='singlepost'] | ||
4 | title: //div[@class='singlepost']/h1 | ||
5 | move_into(//div[@class='singlepost']): //div[@class='info'] | ||
6 | strip: //div[@class='gallery'] | ||
7 | strip: //div[@class='biggallery'] | ||
8 | strip: //ul[@class='social'] | ||
9 | strip: //ul[@class='social_mail'] | ||
10 | |||
11 | test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gamasutra.com.txt b/inc/3rdparty/site_config/standard/gamasutra.com.txt deleted file mode 100755 index 7c808cfd..00000000 --- a/inc/3rdparty/site_config/standard/gamasutra.com.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | # default view title | ||
2 | title: //span[@class='newsTitle'] | ||
3 | # print view title | ||
4 | title: //h3[@class='title'] | ||
5 | |||
6 | # default view author | ||
7 | author: //span[@class='newsAuth']/a | ||
8 | author: substring-after(//span[@class='newsAuth'], 'by ') | ||
9 | |||
10 | # default view date | ||
11 | date: //td[@class='newsDate'] | ||
12 | |||
13 | # default view body | ||
14 | body: //td[@class='featureText'] | ||
15 | body: //td[@class='newsText'] | ||
16 | |||
17 | strip: //h3[@class='title'] | ||
18 | |||
19 | single_page_link: //a[contains(@href, '?print=1')] | ||
20 | test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gameblog.fr.txt b/inc/3rdparty/site_config/standard/gameblog.fr.txt deleted file mode 100755 index 73f8342f..00000000 --- a/inc/3rdparty/site_config/standard/gameblog.fr.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | strip_id_or_class: noprint | ||
7 | strip: //div[@id='gbNewsTextContent']/following-sibling::* | ||
8 | |||
9 | test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video | ||
10 | test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gamechurch.com.txt b/inc/3rdparty/site_config/standard/gamechurch.com.txt deleted file mode 100755 index c9eea5f8..00000000 --- a/inc/3rdparty/site_config/standard/gamechurch.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | |||
3 | date: substring-before(substring-after(//div[@class='comment-bubble']/.., 'Posted'), 'by') | ||
4 | |||
5 | body: //div[@class='the-content'] | ||
6 | |||
7 | strip: //div[@class='article-image responsive'] | ||
8 | |||
9 | strip_id_or_class: 'pullquote' | ||
10 | test_url: http://gamechurch.com/virtual-gun-control-the-best-amendment/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gamer.no.txt b/inc/3rdparty/site_config/standard/gamer.no.txt deleted file mode 100755 index e76a59d9..00000000 --- a/inc/3rdparty/site_config/standard/gamer.no.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@class='pageContent description'] | ||
2 | date: //div[@class='authorsAndDateTime']/span[@title] | ||
3 | single_page_link: //div[@class='pages']/a[last()-1] | ||
4 | |||
5 | # fix images and captions | ||
6 | wrap_in(figure): //div[contains(concat(' ', @class, ' '), ' image')] | ||
7 | wrap_in(figcaption): //div[contains(concat(' ', @class, ' '), ' image')]/div[@class='text']/text() | ||
8 | |||
9 | # get rid of videos | ||
10 | strip_id_or_class: 'video full' | ||
11 | test_url: http://www.gamer.no/artikler/142455/slik-blei-ambisiose-dragons-dogma-skapt/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gamereactor.no.txt b/inc/3rdparty/site_config/standard/gamereactor.no.txt deleted file mode 100755 index 6f7c1b9b..00000000 --- a/inc/3rdparty/site_config/standard/gamereactor.no.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //div[@id='content']/div/h1 | ||
2 | |||
3 | author: //a[@itemprop='reviewer'] | ||
4 | |||
5 | date: //time[@itemprop='dtreviewed']/@datetime | ||
6 | |||
7 | body: //div[@id='breadtext'] | ||
8 | |||
9 | # fix for NOT magically removing anchors with text identical to title | ||
10 | dissolve: //a[text()=//div[@id='content']/div/h1/text()] | ||
11 | test_url: http://www.gamereactor.no/previews/177481/The+Evil+Within/?sid=38b5bd30f56f1b7214de4ff5bed4b76f \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/garythink.com.txt b/inc/3rdparty/site_config/standard/garythink.com.txt deleted file mode 100755 index 327ac55b..00000000 --- a/inc/3rdparty/site_config/standard/garythink.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | tidy: no | ||
2 | |||
3 | test_url: http://www.garythink.com/eft/testing.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gasteroprod.com.txt b/inc/3rdparty/site_config/standard/gasteroprod.com.txt deleted file mode 100755 index 8eda0c36..00000000 --- a/inc/3rdparty/site_config/standard/gasteroprod.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | # These should work, but don't. They were given by Firefox XPather extension | ||
2 | title: //article//header//a//h1 | ||
3 | body: //article//section | ||
4 | test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gatopardo.com.txt b/inc/3rdparty/site_config/standard/gatopardo.com.txt deleted file mode 100755 index 2ab144f5..00000000 --- a/inc/3rdparty/site_config/standard/gatopardo.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[@class='panel'] | ||
2 | strip: //div[@style='float:right'] | ||
3 | strip: //span[@class='titulosHomePublicidad'] | ||
4 | strip: //div[@id='TitTop5Der'] | ||
5 | strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png'] | ||
6 | |||
7 | prune: yes | ||
8 | test_url: http://www.gatopardo.com/ReportajesGP.php?R=95 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gawker.com.txt b/inc/3rdparty/site_config/standard/gawker.com.txt deleted file mode 100755 index 9bc5613a..00000000 --- a/inc/3rdparty/site_config/standard/gawker.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@class="post-body"] | ||
2 | |||
3 | # Remove 'content is restricted' | ||
4 | strip: //div[@id='agegate_IDHERE'] | ||
5 | |||
6 | test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt deleted file mode 100755 index 89eb402f..00000000 --- a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | author: substring-after(//span[@class='storyauthor'],'Posted by') | ||
2 | date: //span[@class='storydate'] | ||
3 | test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/geenstijl.nl.txt b/inc/3rdparty/site_config/standard/geenstijl.nl.txt deleted file mode 100755 index a664b4d9..00000000 --- a/inc/3rdparty/site_config/standard/geenstijl.nl.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id = 'article'] | ||
2 | strip: //div[@id = 'klasbox'] | ||
3 | test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/getnews.jp.txt b/inc/3rdparty/site_config/standard/getnews.jp.txt deleted file mode 100755 index e28d4b8b..00000000 --- a/inc/3rdparty/site_config/standard/getnews.jp.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class='post'] | ||
2 | strip: //ul[@id='bookmark_single'] | ||
3 | test_url: http://getnews.jp/archives/117312 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/giantbomb.com.txt b/inc/3rdparty/site_config/standard/giantbomb.com.txt deleted file mode 100755 index 61de51b2..00000000 --- a/inc/3rdparty/site_config/standard/giantbomb.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | # 2011-11-19 - carlo@... - Initial setup. | ||
2 | |||
3 | strip_id_or_class: user-review-detail | ||
4 | strip: //h1 | ||
5 | |||
6 | body: //div[@class="wiki-content"] | //div[@class="section-bd"] | //div[@class="news-story"] | ||
7 | |||
8 | author: //span[@class="reviewer"] | //p[@class="byline"]/a/text() | ||
9 | date: //span[@class="dtreviewed"] | ||
10 | |||
11 | test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/giga.de.txt b/inc/3rdparty/site_config/standard/giga.de.txt deleted file mode 100755 index e2689eae..00000000 --- a/inc/3rdparty/site_config/standard/giga.de.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | tidy:no | ||
2 | title://h2[@class="title"] | ||
3 | # author:"Ben Miller" | ||
4 | date://div[@id="stats"]/span | ||
5 | strip_id_or_class:stats | ||
6 | strip_id_or_class:breadcrumbs | ||
7 | strip_id_or_class:gn-why-content | ||
8 | strip_id_or_class:single-social | ||
9 | strip_id_or_class:sidebar-ads | ||
10 | strip_id_or_class:sidebar-top | ||
11 | strip_id_or_class:footer | ||
12 | strip_id_or_class:post_meta | ||
13 | # strip_id_or_class: | ||
14 | # strip_id_or_class: | ||
15 | # strip_id_or_class: | ||
16 | # strip_id_or_class: | ||
17 | # strip_id_or_class: | ||
18 | # strip_id_or_class: | ||
19 | |||
20 | test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gigaom.com.txt b/inc/3rdparty/site_config/standard/gigaom.com.txt deleted file mode 100755 index cc8fdfa0..00000000 --- a/inc/3rdparty/site_config/standard/gigaom.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | date: //meta[@name='dcterms.created']/@content | ||
2 | title: //meta[@property='og:title']/@content | ||
3 | author: //section[@class="post-meta"]//a[@rel="author"] | ||
4 | |||
5 | body: //div[starts-with(@id, 'post-content-')] | ||
6 | |||
7 | strip_id_or_class: sharedaddy | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ | ||
12 | test_url: http://gigaom.com/2012/12/26/snapchat-rises-why-pokes-decline-shows-facebooks-inability-to-invent/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gihyo.jp.txt b/inc/3rdparty/site_config/standard/gihyo.jp.txt deleted file mode 100755 index d3534b29..00000000 --- a/inc/3rdparty/site_config/standard/gihyo.jp.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link: //p[@id='skip']//a[contains(@href, 'skip')] | ||
2 | |||
3 | test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gist.github.com.txt b/inc/3rdparty/site_config/standard/gist.github.com.txt deleted file mode 100755 index 90207862..00000000 --- a/inc/3rdparty/site_config/standard/gist.github.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@class="highlight"]/pre | ||
2 | |||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: https://gist.github.com/1258908 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt deleted file mode 100755 index 0de0750b..00000000 --- a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link: //div[@id="content"]//h2/a | ||
2 | |||
3 | test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt deleted file mode 100755 index 2eb82a6d..00000000 --- a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id="leadimage" or @class="postcontent"] | ||
2 | author: //div[@class="contentauthor"] | ||
3 | date: //div[@class="timestamp"] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.txt b/inc/3rdparty/site_config/standard/gizmodo.com.txt deleted file mode 100755 index e73ec9d2..00000000 --- a/inc/3rdparty/site_config/standard/gizmodo.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | #body: //div[@class="post-body" or contains(@class, 'illustration top')] | ||
2 | body: //div[contains(@class, 'image-annotation-box') or contains(@class, 'post-content')] | ||
3 | #author: (//cite//span[@class="plus-icon"])[1] | ||
4 | author: //span[contains(@class, 'display-name')] | ||
5 | date: //span[@class="date"] | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science | ||
10 | test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680 | ||
11 | test_url: http://gizmodo.com/vip.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt b/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt deleted file mode 100755 index d963d684..00000000 --- a/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | body: //div[@id='destaques']//div[contains(@class, 'img')] | //div[@id='maincontent']//p | ||
4 | |||
5 | test_url: http://gizmodo.uol.com.br/nvidia-gtx-titan-z/ | ||
6 | test_url: http://gizmodo.uol.com.br/perfil-mark-zuckerberg-hackeado/ | ||
diff --git a/inc/3rdparty/site_config/standard/gizmologia.com.txt b/inc/3rdparty/site_config/standard/gizmologia.com.txt deleted file mode 100755 index d2c7c9f9..00000000 --- a/inc/3rdparty/site_config/standard/gizmologia.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://gizmologia.com/2011/09/amd-trinity-el-sucesor-de-llano-en-una-demostracion-muy-interesante \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmovil.com.txt b/inc/3rdparty/site_config/standard/gizmovil.com.txt deleted file mode 100755 index 5fc204b8..00000000 --- a/inc/3rdparty/site_config/standard/gizmovil.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://gizmovil.com/2011/09/hipertextual-labs-receptor-bluetooth-nokia-bh-214 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/global.txt b/inc/3rdparty/site_config/standard/global.txt deleted file mode 100755 index 71fbc934..00000000 --- a/inc/3rdparty/site_config/standard/global.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | # Look for Open Graph data - http://ogp.me | ||
2 | title: //meta[@property="og:title"]/@content | ||
3 | date: //meta[@property="article:published_time"]/@content | ||
4 | # article:author is someties URL, e.g. on guardian.co.uk | ||
5 | |||
6 | # Remove Google Publisher Tags: https://support.google.com/dfp_sb/answer/1649768?hl=en | ||
7 | #strip_id_or_class: div-gpt-ad | ||
8 | |||
9 | # Strip doubleclick image ads | ||
10 | strip_image_src: doubleclick.net | ||
11 | |||
12 | # If you get chunks of Javascript code appearing in the extracted output, try uncommenting the lines below. | ||
13 | # This tries to convert script tags to hidden div elements (which Full-Text RSS removes). | ||
14 | # If you notice issues with this approach, please let us know. | ||
15 | #find_string: <script | ||
16 | #replace_string: <div style="display:none" | ||
17 | #find_string: </script> | ||
18 | #replace_string: </div> \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/globalissues.org.txt b/inc/3rdparty/site_config/standard/globalissues.org.txt deleted file mode 100755 index ee50f68f..00000000 --- a/inc/3rdparty/site_config/standard/globalissues.org.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | strip: //p[@class='top'] | ||
4 | strip: //h2[.='Where next?'] | ||
5 | strip_id_or_class: where-next | ||
6 | strip_id_or_class: social-bookmarks | ||
7 | strip_id_or_class: link-to-here | ||
8 | strip_id_or_class: options-heading | ||
9 | strip_id_or_class: page-options-content | ||
10 | strip_id_or_class: page-info-bottom | ||
11 | |||
12 | tidy: no | ||
13 | prune: no | ||
14 | |||
15 | test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt b/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt deleted file mode 100755 index fd8e70ff..00000000 --- a/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | |||
3 | body: //div[@class='materia-titulo']/h2 | //*[@id="materia-letra"] | ||
4 | |||
5 | date: //abbr[@class="published"] | ||
6 | date: //abbr[@class="updated"] | ||
7 | |||
8 | author: //*[@class="author"]/strong | ||
9 | |||
10 | strip: //div[contains(@class,'foto')]/strong | ||
11 | strip: //div[contains(@class,'frase-materia')]/div[@class='autor'] | ||
12 | strip: //div[contains(@class,'saibamais')] | ||
13 | strip: //*[contains(text(),'Clique aqui e veja mais')]/ancestor::p | ||
14 | strip: //ul[@class="toolbar"] | ||
15 | |||
16 | # quotes | ||
17 | wrap_in(blockquote): //div[@id='materia-letra']//div[contains(@class,'frase-materia')]/div[@class='frase'] | ||
18 | |||
19 | prune: no | ||
20 | |||
21 | replace_string([Clique aqui e veja mais vídeos do Fluminense]): [] | ||
22 | |||
23 | test_url: http://globoesporte.globo.com/atletismo/noticia/2013/08/michael-johnson-diz-que-bolt-e-melhor-da-historia-nao-ha-duvidas.html | ||
24 | test_url: http://globoesporte.globo.com/futebol/futebol-internacional/futebol-espanhol/noticia/2013/08/barca-atropela-levante-e-neymar-passa-em-branco-em-estreia-oficial.html | ||
25 | test_url: http://globoesporte.globo.com/futebol/times/fluminense/noticia/2013/08/poupado-no-sabado-felipe-se-diz-pronto-para-ser-titular-contra-o-goias.html | ||
diff --git a/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt b/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt deleted file mode 100755 index 16487955..00000000 --- a/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //article[@id='material']/header/h1 | ||
2 | author: //article[@id='material']/header/div[2]/p | ||
3 | date: //article[@id='material']/header/p/time[1] | ||
4 | body: //section[@id='tresc'] | ||
5 | next_page_link: .//section[@id='tresc']/div[@class='stronicowanie']/a[@rel='next'] | ||
6 | strip://div[@class='podobneSonda'] | ||
7 | |||
8 | test_url: http://www.gloswielkopolski.pl/artykul/803547,abc-telemarketingu-praca-ktora-zwalnia-z-myslenia,id,t.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/goal.com.txt b/inc/3rdparty/site_config/standard/goal.com.txt deleted file mode 100755 index e25e9a00..00000000 --- a/inc/3rdparty/site_config/standard/goal.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //div[@id='article_headline']//h1 | ||
2 | date: //div[contains(@class, 'articleDate')]//h4 | ||
3 | body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content'] | ||
4 | |||
5 | strip_id_or_class: relatedLinksBox | ||
6 | strip_id_or_class: betting-widget | ||
7 | strip_image_src: install_flash.gif | ||
8 | |||
9 | strip: //table[contains(@style, 'float: right; width: 285px;')] | ||
10 | strip: //div[@class='caption'] | ||
11 | |||
12 | tidy: no | ||
13 | prune: no | ||
14 | |||
15 | test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and- | ||
16 | test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to# \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/golem.de.txt b/inc/3rdparty/site_config/standard/golem.de.txt deleted file mode 100755 index c64860c0..00000000 --- a/inc/3rdparty/site_config/standard/golem.de.txt +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | # Rewrite of original template which fetched the printer-version without pictures | ||
3 | |||
4 | tidy: no | ||
5 | prune: no | ||
6 | |||
7 | # Set full title | ||
8 | title: //h1 | ||
9 | |||
10 | date: //time | ||
11 | |||
12 | # Content is here | ||
13 | body: //article | ||
14 | |||
15 | # Fetch full multipage articles | ||
16 | next_page_link: //a[@id='atoc_next'] | ||
17 | |||
18 | # Remove tracking and ads | ||
19 | strip_id_or_class: iqadtile4 | ||
20 | |||
21 | # General Cleanup | ||
22 | strip_id_or_class: list-jtoc | ||
23 | strip_id_or_class: table-jtoc | ||
24 | strip_id_or_class: implied | ||
25 | strip_id_or_class: social- | ||
26 | strip_id_or_class: comments | ||
27 | strip_id_or_class: footer | ||
28 | |||
29 | # Tidy up galleries (could still be improved, though) | ||
30 | strip: //img[@src=''] | ||
31 | |||
32 | # Try yourself | ||
33 | test_url: http://www.golem.de/news/intel-core-i7-5960x-im-test-die-pc-revolution-beginnt-mit-octacore-und-ddr4-1408-108893.html | ||
34 | test_url: http://www.golem.de/news/test-infamous-first-light-neonbunter-actionspass-1408-108914.html | ||
diff --git a/inc/3rdparty/site_config/standard/good.is.txt b/inc/3rdparty/site_config/standard/good.is.txt deleted file mode 100755 index 94159fbf..00000000 --- a/inc/3rdparty/site_config/standard/good.is.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //div[@class="title"]/div/h1 | ||
2 | body: //div[@class="body"] | ||
3 | date: //li[@class="date-time"] | ||
4 | test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/goodfil.ms.txt b/inc/3rdparty/site_config/standard/goodfil.ms.txt deleted file mode 100755 index f8bbbc6a..00000000 --- a/inc/3rdparty/site_config/standard/goodfil.ms.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip_id_or_class: gutter | ||
2 | test_url: http://goodfil.ms/blog/posts/2012/08/13/angularjs-and-the-goodfilms-mobile-site-part-1/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt deleted file mode 100755 index e2d2d0b2..00000000 --- a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | date: //meta[@name='og:article:published_time']/@value | ||
2 | |||
3 | body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] | ||
4 | |||
5 | strip_id_or_class: itemImageGallery | ||
6 | |||
7 | # remove extras at end of post content | ||
8 | find_string: <div style="margin:5px 0 10px;"> | ||
9 | replace_string: </div></body></html><!-- | ||
10 | |||
11 | prune: no | ||
12 | |||
13 | test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous | ||
14 | test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/goteborgsfria.se.txt b/inc/3rdparty/site_config/standard/goteborgsfria.se.txt deleted file mode 100755 index c90aed0b..00000000 --- a/inc/3rdparty/site_config/standard/goteborgsfria.se.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] | ||
2 | author: //article//div[contains(@class, 'field-byline')] | ||
3 | strip_id_or_class: rekommenderade | ||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: annonser | ||
6 | |||
7 | test_url: http://www.goteborgsfria.se/artikel/112079 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gothamist.com.txt b/inc/3rdparty/site_config/standard/gothamist.com.txt deleted file mode 100755 index 36453878..00000000 --- a/inc/3rdparty/site_config/standard/gothamist.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@class='entry-header'] | ||
2 | author: //span[@class='vcard author'] | ||
3 | date: //abbr[@class='published'] | ||
4 | #move_into(//div[@class='entry-body']): //img[@id='photo_1'] | ||
5 | body: //div[@class='entry-body'] | ||
6 | strip: //div[@class='galleryEaseThumbs'] | ||
7 | test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gotomanager.com.txt b/inc/3rdparty/site_config/standard/gotomanager.com.txt deleted file mode 100755 index f8af7324..00000000 --- a/inc/3rdparty/site_config/standard/gotomanager.com.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | title: //span[@id="showTitle"] | ||
2 | author: //span[@id="showAuthor"] | ||
3 | date: //span[@id="showRefDate"] | ||
4 | |||
5 | strip: //span[@class="black_bold"] | ||
6 | strip: //div[@id="sectionName"] | ||
7 | strip: //div[@id="storyHeader"] | ||
8 | |||
9 | body: //div[@id="newsBodyText"] | ||
10 | |||
11 | strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif" | ||
12 | strip_image_src: "http://www.gotomanager.com/images/separator.gif" | ||
13 | strip_image_src: "http://www.gotomanager.com/images/spaces.gif" | ||
14 | |||
15 | convert_double_br_tags: yes | ||
16 | tidy: yes | ||
17 | |||
18 | strip: //div[@id="smallLeadImage"] | ||
19 | strip: //div[@id="truehitsSurvey"] | ||
20 | strip: //table[@id="relatedInfoTable"] | ||
21 | test_url: http://www.gotomanager.com/news/details.aspx?id=86759 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gov.ky.txt b/inc/3rdparty/site_config/standard/gov.ky.txt deleted file mode 100755 index 294ece3a..00000000 --- a/inc/3rdparty/site_config/standard/gov.ky.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | strip: //body//title | ||
2 | |||
3 | test_url: http://www.gov.ky/pls/portal/PORTAL.wwv_media.show?p_id=7593947&p_settingssetid=1&p_settingssiteid=0&p_siteid=2425&p_type=basetext&p_textid=7593948 | ||
4 | test_url: http://www.rcips.ky/pls/portal/wlacomp.wlafeed.show_cignewsfeed_agency?p_sitecode=POL&p_agency=Police \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gp.se.txt b/inc/3rdparty/site_config/standard/gp.se.txt deleted file mode 100755 index 158ae4ed..00000000 --- a/inc/3rdparty/site_config/standard/gp.se.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@id='articleContainer'] | ||
2 | author: //div[@id='articleContent']//div[contains(@class, 'byline')]//span[contains(@class, 'name fn')] | ||
3 | strip_id_or_class: toolbar | ||
4 | strip_id_or_class: ADad | ||
5 | strip_id_or_class: articleSerieWrapper | ||
6 | strip_id_or_class: articleFloatContainer | ||
7 | strip: //div[contains(@class, 'byline')]//img | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.gp.se/nyheter/bohuslan/1.2045564-styckade-mannen-hade-mordat-hustrun | ||
11 | test_url: http://www.gp.se/1.16560 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gq.com.txt b/inc/3rdparty/site_config/standard/gq.com.txt deleted file mode 100755 index 8ad8a14e..00000000 --- a/inc/3rdparty/site_config/standard/gq.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a | ||
2 | strip_id_or_class: utility | ||
3 | strip_id_or_class: keywords | ||
4 | strip_id_or_class: pagination | ||
5 | strip_id_or_class: position2_content | ||
6 | body: //div[@class='article'] | ||
7 | title: //h1[@class='content-headline'] | ||
8 | author: //span[@class='contributor']//a | ||
9 | test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/grantland.com.txt b/inc/3rdparty/site_config/standard/grantland.com.txt deleted file mode 100755 index b8d419f4..00000000 --- a/inc/3rdparty/site_config/standard/grantland.com.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | # this is fragile with footnotes -- leave it for now | ||
2 | |||
3 | #tidy: no | ||
4 | #prune: no | ||
5 | #move_into(//article): //aside[@id='footnotes'] | ||
6 | author: //cite/a | ||
7 | date: //time | ||
8 | |||
9 | strip: //a[text()='Grantland'] | ||
10 | strip_id_or_class: ad-wrapper | ||
11 | strip_id_or_class: fb-connect-link | ||
12 | strip_id_or_class: fb-status | ||
13 | strip: //li[@class='print'] | ||
14 | strip: //cite | ||
15 | strip: //a[contains(text(), '[+]')] | ||
16 | strip: //a[@id='jump-nav-link'] | ||
17 | strip: //h1[text()='Share This'] | ||
18 | strip: //h1[text()='Top Stories'] | ||
19 | strip: //div[@id="update-text-size"] | ||
20 | test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt deleted file mode 100755 index 31a41075..00000000 --- a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //div[@class="blogpost"]/h2 | ||
2 | author: //div[@class="blogpost"]/p[@class="byline"]/a | ||
3 | date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"] | ||
4 | body: //div[@class="blogpost"] | ||
5 | strip_id_or_class: flag | ||
6 | strip_id_or_class: byline | ||
7 | strip_id_or_class: post_footer | ||
8 | strip_id_or_class: related_posts | ||
9 | strip_id_or_class: post_author_bios | ||
10 | strip: //h2 | ||
11 | test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/groups.drupal.org.txt b/inc/3rdparty/site_config/standard/groups.drupal.org.txt deleted file mode 100755 index 0fe30ef5..00000000 --- a/inc/3rdparty/site_config/standard/groups.drupal.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title://h1 | ||
2 | author://span[@class="submitted"]/a | ||
3 | date:substring-after(//span[@class="submitted"],'on ') | ||
4 | body://div[@class="content"] | ||
5 | test_url: http://groups.drupal.org/node/36816 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/guardian.co.uk.txt b/inc/3rdparty/site_config/standard/guardian.co.uk.txt deleted file mode 100644 index 71d84306..00000000 --- a/inc/3rdparty/site_config/standard/guardian.co.uk.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@id='main-article-info']//h1 | ||
2 | body: //div[@id='article-wrapper'] | ||
3 | date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate] | ||
4 | author: //li[@class='byline'] | ||
5 | prune: no | ||
6 | tidy: no | ||
7 | test_url: http://www.guardian.co.uk/business/2011/oct/06/quantitative-easing-75bn-bank-of-england \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gulfnews.com.txt b/inc/3rdparty/site_config/standard/gulfnews.com.txt deleted file mode 100755 index 97b620de..00000000 --- a/inc/3rdparty/site_config/standard/gulfnews.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article'] | ||
2 | strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/guokr.com.txt b/inc/3rdparty/site_config/standard/guokr.com.txt deleted file mode 100755 index f8327bea..00000000 --- a/inc/3rdparty/site_config/standard/guokr.com.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | # To administrator: | ||
2 | # Please change the hostname to "www.guokr.com/article/*" | ||
3 | # Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com | ||
4 | |||
5 | # This filter is tested on: | ||
6 | # http://www.guokr.com/article/274325/ | ||
7 | # http://www.guokr.com/article/275013/ | ||
8 | |||
9 | title://h1 | ||
10 | author://div[contains(@class, 'content-th-info')]/a | ||
11 | date://div[contains(@class, 'content-th-info')]/span | ||
12 | body://div[contains(@class, 'Content')] | ||
13 | |||
14 | strip://div[contains(@class, 'bottom-i')] | ||
15 | strip://div[contains(@class, 'copyright')] | ||
16 | strip://div[contains(@class, 'fr')] | ||
17 | strip://div[contains(@class, 'content-th-info')] | ||
18 | strip://h1[contains(@id, 'articleTitle')] | ||
19 | strip://div[contains(@class, 'side')] | ||
20 | strip://div[contains(@class, 'top-wp')] | ||
21 | test_url: http://www.guokr.com/article/275013/ | ||
22 | test_url: http://www.guokr.com/article/338387/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/haberler.com.txt b/inc/3rdparty/site_config/standard/haberler.com.txt deleted file mode 100755 index 1bb2bc7d..00000000 --- a/inc/3rdparty/site_config/standard/haberler.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@id="habermetni"]/h1[@id="haber_baslik"] | ||
2 | body: //div[@id="habermetni"]/p | ||
3 | strip: //img[@class='newsDetailLeft'] | ||
4 | strip_image_src: /haber-resimleri/ | ||
5 | test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/habrahabr.ru.txt b/inc/3rdparty/site_config/standard/habrahabr.ru.txt deleted file mode 100755 index 67538359..00000000 --- a/inc/3rdparty/site_config/standard/habrahabr.ru.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | title: //span[@class="post_title"] | ||
2 | author: //div[@class="author"] | ||
3 | date: //div[@class="published | ||
4 | |||
5 | body: //div[@class='content html_format'] | //div[@id='comments'] | ||
6 | |||
7 | strip: //a[@class="link_to_comment"] | ||
8 | strip: //div[@class="show_tree"] | ||
9 | strip: //a[@class="to_parent"] | ||
10 | |||
11 | |||
12 | replace_string(class="reply_comments"): style="padding-left: 20px" | ||
13 | replace_string(class="voting "): style="float: right" | ||
14 | replace_string(src="//habrastorage.org/getpro/habr/avatars/): style="width:24px; height:24px;" class="123" src="//habrastorage.org/getpro/habr/avatars/ | ||
15 | replace_string(class="info "): style="padding-top:5px;font-size:0.85em;line-height:24px;" | ||
16 | |||
17 | |||
18 | prune: no | ||
19 | tidy: no | ||
20 | |||
21 | test_url: http://habrahabr.ru/post/229883/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hackmake.org.txt b/inc/3rdparty/site_config/standard/hackmake.org.txt deleted file mode 100755 index 98140117..00000000 --- a/inc/3rdparty/site_config/standard/hackmake.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | date: //article//time[@pubdate] | ||
2 | body: //article/div[@id="post-wide"] | ||
3 | title: //article/header/h2 | ||
4 | strip: /div[@id="comment"] | ||
5 | strip: //footer | ||
6 | author: substring-after(//footer/p[@class='byline'] , 'By') | ||
7 | test_url: http://hackmake.org/2012/12/21/mindfulness-of-concentration \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/halo.bungie.org.txt b/inc/3rdparty/site_config/standard/halo.bungie.org.txt deleted file mode 100755 index 1802efea..00000000 --- a/inc/3rdparty/site_config/standard/halo.bungie.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title:substring-before(id("maincontent")/table, 'Posted') | ||
2 | body:id("maincontent")/p | ||
3 | # eventually convert linebreaks better | ||
4 | |||
5 | test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt deleted file mode 100755 index 33f7e726..00000000 --- a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | # Remove right column | ||
2 | strip: //*[(@class = 'right_col')] | ||
3 | |||
4 | # Remove comments etc. | ||
5 | strip: //*[(@class = 'category')] | ||
6 | strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3 | ||
7 | test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/handelsblatt.com.txt b/inc/3rdparty/site_config/standard/handelsblatt.com.txt deleted file mode 100755 index 7d067aa6..00000000 --- a/inc/3rdparty/site_config/standard/handelsblatt.com.txt +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | #Single Page | ||
2 | single_page_link: //li[contains(@class,"hcf-print")]/a | ||
3 | |||
4 | # Title hcf-headline | ||
5 | title: //span[@class='hcf-headline'] | ||
6 | |||
7 | # Authors | ||
8 | author: //div[@class="hcf-author"]/a/text() | ||
9 | author: substring-after(//div[@class='hcf-author'], 'von ') | ||
10 | |||
11 | # Date | ||
12 | date: //div[@class='hcf-article-date'] | ||
13 | |||
14 | # Body | ||
15 | body: //div[@class='article'] | ||
16 | |||
17 | # General removements | ||
18 | strip: //div[contains(@class,"hcf-smartbox")] | ||
19 | strip: //div[contains(@class,"hcf-stopper")] | ||
20 | strip: //div[contains(@class,"hcf-img-controls")] | ||
21 | strip: //span[@class='hcf-location-mark'] | ||
22 | strip: //span[@class='hcf-copyright'] | ||
23 | strip: //div[@class='hcf-copyright'] | ||
24 | strip: //div[@class='hcf-origin'] | ||
25 | |||
26 | |||
27 | |||
28 | |||
29 | # Fix picture captions | ||
30 | wrap_in(small): //div[@class="hcf-caption"] | ||
31 | test_url: http://www.handelsblatt.com/meinung/gastbeitraege/gastkommentar-zum-emissionshandel-kurskorrekturen-fuehren-zum-kentern/8044326.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hanselman.com.txt b/inc/3rdparty/site_config/standard/hanselman.com.txt deleted file mode 100755 index 1dca632f..00000000 --- a/inc/3rdparty/site_config/standard/hanselman.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | date: //span[@class="item-date"] | ||
2 | body: //div[@class="item-content"] | ||
3 | strip_comments: no | ||
4 | test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hardware.fr.txt b/inc/3rdparty/site_config/standard/hardware.fr.txt deleted file mode 100755 index e4f1f6bc..00000000 --- a/inc/3rdparty/site_config/standard/hardware.fr.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //a[@class='a_aut'] | ||
3 | body: //div[@class='content_dossier'] | ||
4 | strip: //div[@id='pagination'] | ||
5 | next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href | ||
6 | test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hardware.no.txt b/inc/3rdparty/site_config/standard/hardware.no.txt deleted file mode 100755 index cbbcf84e..00000000 --- a/inc/3rdparty/site_config/standard/hardware.no.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //h1[@class='headline'] | ||
2 | title: //h2[@itemprop='alternativeHeadline'] | ||
3 | title: //h1[@itemprop='headline'] | ||
4 | author: //span[@itemprop='name'] | ||
5 | date: //time[@itemprop='datePublished'] | ||
6 | body: //div[@itemprop='reviewBody'] | ||
7 | |||
8 | wrap_in(blockquote): //div[@class='factBox'] | ||
9 | |||
10 | next_page_link: //a[@rel='next'] | ||
11 | |||
12 | strip_id_or_class: 'product-box' | ||
13 | strip: //a[@rel='next'] | ||
14 | strip: //a[text()='Del på Facebook'] | ||
15 | strip: //a[text()='Del på Twitter'] | ||
16 | test_url: http://www.hardware.no/artikler/asus-vg248qe/132792 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hbr.org.txt b/inc/3rdparty/site_config/standard/hbr.org.txt deleted file mode 100755 index c2f292e1..00000000 --- a/inc/3rdparty/site_config/standard/hbr.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@id='article-title'] | ||
2 | author: //div[@id='articleAuthors'] | ||
3 | body: //div[@id='article'] | ||
4 | strip: //div[@class='module wide'] | ||
5 | #single_page_link: //a[@class='social-print'] | ||
6 | test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/ | ||
7 | test_url: http://hbr.org/2013/03/big-bang-disruption/ar/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/headrush.typepad.com.txt b/inc/3rdparty/site_config/standard/headrush.typepad.com.txt deleted file mode 100755 index a3146771..00000000 --- a/inc/3rdparty/site_config/standard/headrush.typepad.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title://div[@class='content']/h3[1] | ||
2 | body://div[@class='content'] | ||
3 | |||
4 | # Article nav | ||
5 | strip://div[@class='content']/p[1] | ||
6 | |||
7 | # Comments and trackbacks | ||
8 | strip://h2/following-sibling::p | ||
9 | strip://h2 | ||
10 | |||
11 | # Posted on | ||
12 | strip://b/p | ||
13 | strip://div[@class='content']/p[@class='posted'] | ||
14 | test_url: http://headrush.typepad.com/creating_passionate_users/2005/05/the_case_for_ea.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/healthland.time.com.txt b/inc/3rdparty/site_config/standard/healthland.time.com.txt deleted file mode 100644 index 204d8da0..00000000 --- a/inc/3rdparty/site_config/standard/healthland.time.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | date: //span[@class = 'date'] | ||
2 | body: //div[@class = 'entry-content'] | ||
3 | strip://div[@class='more-ways'] | ||
4 | strip://div[@id = 'stayConnected'] | ||
5 | strip://p[child::a[@rel = 'bookmark']] | ||
6 | strip://p[starts-with(string(.),'(MORE:')] | ||
7 | strip://p[starts-with(string(.),'(PHOTOS:')] | ||
8 | move_into(//p[../@class = 'entry-content'][position() = last()])://div[@id = 'featbox'] | ||
9 | |||
10 | test_url: http://healthland.time.com/2011/07/24/amy-winehouse-and-the-pain-of-addiction/?preview=true&preview_id=39210&preview_nonce=0777d4e408 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/heise-online.mobi.txt b/inc/3rdparty/site_config/standard/heise-online.mobi.txt deleted file mode 100755 index daff6143..00000000 --- a/inc/3rdparty/site_config/standard/heise-online.mobi.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id='content']/div | ||
2 | date: //p[@class='author_date']/span[@class='date'] | ||
3 | test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/heise.de.txt b/inc/3rdparty/site_config/standard/heise.de.txt deleted file mode 100755 index 9433104b..00000000 --- a/inc/3rdparty/site_config/standard/heise.de.txt +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | # Template should work well with either desktop or mobile version (m.heise.de) | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | title: //article/h1 | //h1 | ||
7 | date: //p[@class='news_datum'] | ||
8 | author: //h4[@class='author'] | ||
9 | |||
10 | body: //article | //div[@class='meldung_wrapper'] | ||
11 | |||
12 | # General cleanup | ||
13 | strip: //time | ||
14 | strip: //h4[@class='author'] | ||
15 | strip: //p[@class='news_datum'] | ||
16 | strip: //p[@class='artikel_datum'] | ||
17 | strip: //a[contains(@href, 'mailto')] | ||
18 | strip_id_or_class: comments | ||
19 | strip_id_or_class: ISI_IGNORE | ||
20 | strip_id_or_class: clear | ||
21 | |||
22 | strip_id_or_class: linkurl_grossbild | ||
23 | strip_id_or_class: image-num | ||
24 | strip_id_or_class: heisebox_right | ||
25 | strip_id_or_class: dossier | ||
26 | |||
27 | # Strip Ads | ||
28 | strip_id_or_class: ad_ | ||
29 | |||
30 | # Some optimizations | ||
31 | replace_string(<h5>): <h2> | ||
32 | replace_string(</h5>): </h2> | ||
33 | replace_string(<span class="bild_rechts"): <p | ||
34 | replace_string(<div class="heisebox">): <blockquote> | ||
35 | |||
36 | |||
37 | next_page_link: //a[@class='next'] | ||
38 | next_page_link: //a[@title='vor'] | ||
39 | |||
40 | test_url: http://www.heise.de/open/artikel/Die-Neuerungen-von-Linux-3-15-2196231.html | ||
41 | test_url: http://m.heise.de/open/artikel/Die-Neuerungen-von-Linux-3-15-2196231.html | ||
42 | test_url: http://www.heise.de/newsticker/meldung/Ueberwachungstechnik-Die-globale-Handy-Standortueberwachung-2301494.html | ||
diff --git a/inc/3rdparty/site_config/standard/hemmings.com.txt b/inc/3rdparty/site_config/standard/hemmings.com.txt deleted file mode 100755 index a02b4a62..00000000 --- a/inc/3rdparty/site_config/standard/hemmings.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@id='leftdetail'] | ||
3 | single_page_link: //a[contains(@href, 'printable=1')] | ||
4 | strip: //a[contains(., 'Full Version')] | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.hemmings.com/classifieds/dealer/ferrari/330gtc/1601235.html | ||
9 | test_url: http://www.hemmings.com/rss/keyword.xml?adtype=carsforsale&make=ferrari \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/heroturko.me.txt b/inc/3rdparty/site_config/standard/heroturko.me.txt deleted file mode 100755 index 07b6adf1..00000000 --- a/inc/3rdparty/site_config/standard/heroturko.me.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[contains(@class, 'title')]//h1 | ||
2 | body: //div[contains(@class, 'story')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.heroturko.me/5223034-ds-catia-p3-v5-6r2014-gasp0-x86x64-multilanguage-english-docs.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hespress.com.txt b/inc/3rdparty/site_config/standard/hespress.com.txt deleted file mode 100755 index 4ed0b8b5..00000000 --- a/inc/3rdparty/site_config/standard/hespress.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body'] | ||
2 | |||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://hespress.com/videos/73684.html | ||
7 | test_url: http://hespress.com/permalink/73678.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hiamag.com.txt b/inc/3rdparty/site_config/standard/hiamag.com.txt deleted file mode 100755 index 3c7ba5ac..00000000 --- a/inc/3rdparty/site_config/standard/hiamag.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: (//div[contains(@class, 'gallery-slides')]//img)[1] | //div[contains(@class, 'node_body_inner')] | ||
2 | |||
3 | test_url: http://www.hiamag.com/rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/highscalability.com.txt b/inc/3rdparty/site_config/standard/highscalability.com.txt deleted file mode 100755 index 5a808fa4..00000000 --- a/inc/3rdparty/site_config/standard/highscalability.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class='journal-entry-text'] | ||
2 | |||
3 | test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hiperpop.com.txt b/inc/3rdparty/site_config/standard/hiperpop.com.txt deleted file mode 100755 index b5eb062e..00000000 --- a/inc/3rdparty/site_config/standard/hiperpop.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://hiperpop.com/2011/09/marc-anthony-celebra-su-cumpleanos-con-jennifer-lopez \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt deleted file mode 100755 index d869a866..00000000 --- a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class = 'pd'] | ||
2 | strip: //div[@id = 'overzicht-albumrecensies'] | ||
3 | strip: //div[@id = 'jc'] | ||
4 | test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/historytoday.com.txt b/inc/3rdparty/site_config/standard/historytoday.com.txt deleted file mode 100755 index 78fb60a6..00000000 --- a/inc/3rdparty/site_config/standard/historytoday.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body://div[@id = 'content'] | ||
2 | author://span[@class = 'authors'] | ||
3 | author://span[@class = 'ht-vtag'][1] | ||
4 | date:substring-before(//meta[@name = 'dc.date']/@content,'T') | ||
5 | strip://div[contains(@class, 'region-ubercontent')] | ||
6 | strip://h1 | ||
7 | strip://div[@id = 'ht-author'] | ||
8 | strip://ul[@class = 'links inline'] | ||
9 | strip://div[@id = 'ht-tools'] | ||
10 | test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hmercer.com.txt b/inc/3rdparty/site_config/standard/hmercer.com.txt deleted file mode 100755 index 2da13a8e..00000000 --- a/inc/3rdparty/site_config/standard/hmercer.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //*[@class='ptitle'] | ||
2 | date: //span[@class='date'] | ||
3 | body: //div[@class='body'] | ||
4 | prune: no | ||
5 | test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hollywoodlife.com.txt b/inc/3rdparty/site_config/standard/hollywoodlife.com.txt deleted file mode 100755 index 975ffa26..00000000 --- a/inc/3rdparty/site_config/standard/hollywoodlife.com.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | date: //meta[@name='sailthru.date']/@content | ||
2 | body: //article[contains(@class, 'entry-content')] | ||
3 | |||
4 | strip_image_src: subscribe.png | ||
5 | |||
6 | strip_id_or_class: wpcom-iframe-form | ||
7 | strip_id_or_class: gallery-thumbs | ||
8 | strip_id_or_class: twitter | ||
9 | strip_id_or_class: fb-link | ||
10 | strip_id_or_class: pinterest | ||
11 | |||
12 | strip: //div[@class='data'] | ||
13 | strip: //iframe[contains(@name, 'wpcom')] | ||
14 | |||
15 | find_string: <a href="http://www.youtube.com/subscription_center?add_user_id=2rJLq19N0dGrxfib80M | ||
16 | replace_string: </p></div></body></html><!-- | ||
17 | |||
18 | find_string: <h3>More | ||
19 | replace_string: </div></body></html><!-- | ||
20 | |||
21 | test_url: http://hollywoodlife.com/2013/10/04/miriam-carey-dead-capitol-hill-car-chase-shooting-postpartum-depression/ | ||
22 | test_url: http://hollywoodlife.com/feed/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt deleted file mode 100755 index 8ed26ff5..00000000 --- a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@id='entry-body'] | ||
2 | strip_id_or_class: paginate | ||
3 | strip: //p[contains(., 'Additional Resources')] | ||
4 | test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hosted.ap.org.txt b/inc/3rdparty/site_config/standard/hosted.ap.org.txt deleted file mode 100755 index a660f23b..00000000 --- a/inc/3rdparty/site_config/standard/hosted.ap.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content'] | ||
2 | tidy: no | ||
3 | strip_image_src: analytics.apnewsregistry | ||
4 | |||
5 | test_url: http://hosted.ap.org/dynamic/stories/E/EU_TURKEY_KURDS?SITE=KSNEW&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2014-10-14-10-50-25 | ||
diff --git a/inc/3rdparty/site_config/standard/howtogeek.com.txt b/inc/3rdparty/site_config/standard/howtogeek.com.txt deleted file mode 100755 index baa2ed4a..00000000 --- a/inc/3rdparty/site_config/standard/howtogeek.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[contains(@class, 'thecontent')] | ||
2 | |||
3 | strip_image_src: loading.gif | ||
4 | find_string:src="http://cdn.howtogeek.com/public/images/blank.gif" | ||
5 | replace_string:- | ||
6 | find_string:data-href= | ||
7 | replace_string:src= | ||
8 | |||
9 | strip_id_or_class: relatedside | ||
10 | |||
11 | test_url: http://www.howtogeek.com/school/microsoft-excel-formulas-and-functions/lesson1/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hs.fi.txt b/inc/3rdparty/site_config/standard/hs.fi.txt deleted file mode 100755 index 360dc725..00000000 --- a/inc/3rdparty/site_config/standard/hs.fi.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | prune: yes | ||
2 | tidy: yes | ||
3 | test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ht.ly.txt b/inc/3rdparty/site_config/standard/ht.ly.txt deleted file mode 100755 index 46535088..00000000 --- a/inc/3rdparty/site_config/standard/ht.ly.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link: //iframe[@id='hootFrame']/@src | ||
2 | |||
3 | test_url: http://ht.ly/bOiZV \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt deleted file mode 100755 index d4618c14..00000000 --- a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')] | ||
3 | date: //meta[@name="publish_date"]/@content | ||
4 | author: //a[@rel="author"] | ||
5 | author: //meta[@name="author"]/@content | ||
6 | |||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | strip: //footer | ||
11 | strip_id_or_class: ps-slideshow | ||
12 | strip_id_or_class: fs-slideshow | ||
13 | strip: //p[contains(., 'Related on HuffPost:')] | ||
14 | strip_id_or_class: contribute-story | ||
15 | strip_id_or_class: promo_holder | ||
16 | |||
17 | # end early | ||
18 | replace_string(<div class="sbm-main): </body></html><div class="not-interested | ||
19 | |||
20 | test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html | ||
21 | test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html | ||
diff --git a/inc/3rdparty/site_config/standard/humantransit.org.txt b/inc/3rdparty/site_config/standard/humantransit.org.txt deleted file mode 100755 index 92d3c678..00000000 --- a/inc/3rdparty/site_config/standard/humantransit.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h3[@class="entry-header"] | ||
2 | date: //h2[@class="date-header"] | ||
3 | body: //div[contains(@class, 'entry')] | ||
4 | |||
5 | test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt deleted file mode 100755 index 68fd220a..00000000 --- a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@class='HaberDetayTitleHold Title']/h1 | ||
2 | body: //div[@id='YazarDetayText'] | ||
3 | author: //div[@class='HaberDetayTitleHold Title']/h1 | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp | ||
7 | test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hvg.hu.txt b/inc/3rdparty/site_config/standard/hvg.hu.txt deleted file mode 100755 index 05e7b5f1..00000000 --- a/inc/3rdparty/site_config/standard/hvg.hu.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id='pg-content']//h1 | ||
2 | body: //div[@id='articleBody0'] | ||
3 | replace_string(</table>): </table><br /><br /> | ||
4 | |||
5 | single_page_link: //div[@class="up-header"]/a | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://hvg.hu/w/20111125_sparta \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hypebeast.com.txt b/inc/3rdparty/site_config/standard/hypebeast.com.txt deleted file mode 100755 index 23e47545..00000000 --- a/inc/3rdparty/site_config/standard/hypebeast.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1] | ||
2 | author: //span[@class='author']/a | ||
3 | |||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: paginator | ||
6 | strip_id_or_class: photo-number | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt b/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt deleted file mode 100755 index 3bda753c..00000000 --- a/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | tidy:no | ||
2 | prune:no | ||
3 | |||
4 | body://div[contains(@id,'content')] | ||
5 | |||
6 | strip_id_or_class:meta | ||
7 | strip_id_or_class:notes | ||
8 | strip_id_or_class:pagination | ||
9 | test_url: http://icannabis.tumblr.com/post/28660592471/reviewmswireless3000 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/idealog.co.nz.txt b/inc/3rdparty/site_config/standard/idealog.co.nz.txt deleted file mode 100755 index ca88f606..00000000 --- a/inc/3rdparty/site_config/standard/idealog.co.nz.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@class='content'] | ||
2 | |||
3 | strip: //p[@class='dateline'] | ||
4 | strip: //hr | ||
5 | strip_id_or_class: share | ||
6 | strip_id_or_class: comments | ||
7 | strip_id_or_class: tags | ||
8 | |||
9 | title: substring-before(//title,' ::') | ||
10 | author: substring-before(//p[@class='dateline'],',') | ||
11 | date: //p[@class='dateline']/time | ||
12 | test_url: http://www.idealog.co.nz/blog/2012/12/geeks-plane-help-kiwis-take-san-francisco \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/idlewords.com.txt b/inc/3rdparty/site_config/standard/idlewords.com.txt deleted file mode 100755 index f3b33796..00000000 --- a/inc/3rdparty/site_config/standard/idlewords.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //a[@class='post_title'] | ||
2 | body: //div[@class='entrybox'] | ||
3 | strip_id_or_class: post_title | ||
4 | date: //div[@class='entrybox']/b[1] | ||
5 | strip: //div[@class='entrybox']/b[1] | ||
6 | author: string('Maciej Cegłowski') | ||
7 | test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/igeneration.fr.txt b/inc/3rdparty/site_config/standard/igeneration.fr.txt deleted file mode 100755 index 45dd5f25..00000000 --- a/inc/3rdparty/site_config/standard/igeneration.fr.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ') | ||
2 | date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- '))) | ||
3 | body: //div[@class='content clear-block zoneApple'] | ||
4 | |||
5 | test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt deleted file mode 100755 index 60635301..00000000 --- a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title://h1[@class='page-title'] | ||
2 | body://*[@id='content']//div[contains(@class,'node-content')] | ||
3 | |||
4 | author://*[@id='content']//div[contains(@class,'node-submitted')]/a | ||
5 | |||
6 | date:substring-after(//div[contains(@class,'node-submitted')],' on ') | ||
7 | test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ilounge.com.txt b/inc/3rdparty/site_config/standard/ilounge.com.txt deleted file mode 100755 index 9880b51f..00000000 --- a/inc/3rdparty/site_config/standard/ilounge.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | # Get proper Title, Author and Date info | ||
2 | title: substring-before(//title, '|') | ||
3 | author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By') | ||
4 | date: //span[@class='instapaper_date'] | ||
5 | |||
6 | # For Reviews & First Looks, get the intro paragraph and put it in front of the main body. | ||
7 | move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body'] | ||
8 | body: //div[@id='instapaper_para1'] | ||
9 | strip: //div[@class='reviewinfo'] | ||
10 | |||
11 | # We don't use footnotes, so why bother checking for them? | ||
12 | footnotes: no | ||
13 | test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt deleted file mode 100755 index 51a7eb9c..00000000 --- a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class='published visible e2-smart-title']//span | ||
2 | author: //span[@id='e2-blog-title'] | ||
3 | date: //p[@class='super-h'] | ||
4 | body: //div[@class='text published visible'] | ||
5 | test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/inc.com.txt b/inc/3rdparty/site_config/standard/inc.com.txt deleted file mode 100755 index 5410e64e..00000000 --- a/inc/3rdparty/site_config/standard/inc.com.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | author: substring-after(substring-before(//div[@id='byline'],'|'),'By') | ||
2 | author: //div[@class='byline']/a | ||
3 | date: //span[@class='pubdate'] | ||
4 | # print friendly page | ||
5 | body: //div[@id='text'] | ||
6 | # regular page | ||
7 | body: //div[@id= 'articlecontent'] | ||
8 | |||
9 | strip: //div[@id= 'articlecontent']/h1 | ||
10 | strip: //div[@id='articlecontent']/p[@class='deck'] | ||
11 | strip: //div[@id='articlecontent']/div[@class='byline'] | ||
12 | strip: //div[@id='articlespacer'] | ||
13 | strip: //div[@id='incsharebox'] | ||
14 | strip: //div[@id='articlesidebar'] | ||
15 | |||
16 | prune: no | ||
17 | |||
18 | single_page_link: //a[contains(@href, 'Printer_Friendly.html')] | ||
19 | strip: //a[contains(., 'Dig Deeper')] | ||
20 | test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html | ||
21 | test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/independent.co.uk.txt b/inc/3rdparty/site_config/standard/independent.co.uk.txt deleted file mode 100755 index af742209..00000000 --- a/inc/3rdparty/site_config/standard/independent.co.uk.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //div[contains(@class, 'articleContent')] | ||
3 | date: //meta[@property='article:published_time']/@content | ||
4 | author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] | ||
5 | |||
6 | strip_id_or_class: RelatedArtTag | ||
7 | |||
8 | tidy: no | ||
9 | test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/index.php b/inc/3rdparty/site_config/standard/index.php deleted file mode 100644 index a1b767fd..00000000 --- a/inc/3rdparty/site_config/standard/index.php +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | <?php | ||
2 | // this is here to prevent directory listing over the web | ||
3 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/indiatimes.com.txt b/inc/3rdparty/site_config/standard/indiatimes.com.txt deleted file mode 100755 index 8112105f..00000000 --- a/inc/3rdparty/site_config/standard/indiatimes.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //figure[@class='mainVideo'] | ||
2 | strip: //figcaption | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/inessential.com.txt b/inc/3rdparty/site_config/standard/inessential.com.txt deleted file mode 100755 index 52252455..00000000 --- a/inc/3rdparty/site_config/standard/inessential.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class='weblogPost']/h3[1] | ||
2 | author: ("Brent Simmons") | ||
3 | date: //span[@class="weblogPostDisplayDate"] | ||
4 | body: //div[@class='weblogPostBody'] | ||
5 | test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/info.abril.com.br.txt b/inc/3rdparty/site_config/standard/info.abril.com.br.txt deleted file mode 100755 index dee69f80..00000000 --- a/inc/3rdparty/site_config/standard/info.abril.com.br.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title://h1 | ||
2 | body://div[@id='texto_link'] | ||
3 | |||
4 | test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/infoq.com.txt b/inc/3rdparty/site_config/standard/infoq.com.txt deleted file mode 100755 index f4a328a6..00000000 --- a/inc/3rdparty/site_config/standard/infoq.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | body: //div[@id="intTranscript"] | ||
2 | body: //div[@class="box-content"] | ||
3 | title: //div[@class="box-content"]//h1[1] | ||
4 | author: //p[@class="info"]/strong | ||
5 | date: substring-before(substring-after(//p[@class="info"], "on"), "Length") | ||
6 | strip: //div[@class="box-content"]//h1[1] | ||
7 | strip: //div[@class="box-content"]//p[@class="info"] | ||
8 | strip_id_or_class: vendor-content-box | ||
9 | strip_id_or_class: tags2 | ||
10 | strip_id_or_class: instructions | ||
11 | strip_id_or_class: comments | ||
12 | strip_id_or_class: forum-list-tree | ||
13 | strip: //div[@class="addthis_toolbox addthis_default_style"] | ||
14 | test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informador.com.mx.txt b/inc/3rdparty/site_config/standard/informador.com.mx.txt deleted file mode 100755 index 77987493..00000000 --- a/inc/3rdparty/site_config/standard/informador.com.mx.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@class='tituloInt'] | ||
2 | body: //div[@class='notaPortada'] | ||
3 | strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota'] | ||
4 | date: //span[@class='publi'] | ||
5 | author: //span[@class='autor'] | ||
6 | tidy: no | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/information.dk.txt b/inc/3rdparty/site_config/standard/information.dk.txt deleted file mode 100755 index 3ade754d..00000000 --- a/inc/3rdparty/site_config/standard/information.dk.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | author: //*[@property='dc:creator'] | ||
3 | date: //*[@property='dc:date']/@content | ||
4 | body: //div[@id='page-content']//div[contains(@class, 'article-body')] | ||
5 | |||
6 | tidy: no | ||
7 | test_url: http://www.information.dk/282307 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informationarchitects.net.txt b/inc/3rdparty/site_config/standard/informationarchitects.net.txt deleted file mode 100755 index 1330a040..00000000 --- a/inc/3rdparty/site_config/standard/informationarchitects.net.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title://h1[@class="post_title"] | ||
2 | body://article[@class="post"] | ||
3 | date://h1[@class="section_separator"] | ||
4 | author://span[@class="post_author"] | ||
5 | strip://nav[@class="arrow_nav"] | ||
6 | strip://section[@id="contact"] | ||
7 | strip_id_or_class:post_title | ||
8 | strip_id_or_class:post_author | ||
9 | strip_id_or_class:section_separator | ||
10 | test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt deleted file mode 100755 index 60b798e6..00000000 --- a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //head/title | ||
2 | body: //table[@id='table3']//div[@class='postContent'] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.informationclearinghouse.info/article28238.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informit.com.txt b/inc/3rdparty/site_config/standard/informit.com.txt deleted file mode 100755 index 24bf6242..00000000 --- a/inc/3rdparty/site_config/standard/informit.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@id='content']/h1 | ||
2 | body: //div[@id="content"] | ||
3 | strip: //img[contains(@src, 'informit_printer.png')] | ||
4 | single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')] | ||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.informit.com/articles/article.aspx?p=1729268 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/infoworld.com.txt b/inc/3rdparty/site_config/standard/infoworld.com.txt deleted file mode 100755 index d335bc4a..00000000 --- a/inc/3rdparty/site_config/standard/infoworld.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@id='main_text'] | ||
2 | title: //div[@id='main_text']/h1 | ||
3 | strip: //div[@id='main_text']/h1 | ||
4 | strip: //div[@id='main_text']/h2 | ||
5 | strip_id_or_class: tools | ||
6 | strip_id_or_class: articleTools | ||
7 | strip_id_or_class: pagination | ||
8 | strip_id_or_class: byline | ||
9 | strip_id_or_class: tweet | ||
10 | date: //div[@class='date'] | ||
11 | strip: //div[@class='date'] | ||
12 | test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/infzm.com.txt b/inc/3rdparty/site_config/standard/infzm.com.txt deleted file mode 100755 index 489d5aff..00000000 --- a/inc/3rdparty/site_config/standard/infzm.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.infzm.com/content/71068 | ||
3 | # http://www.infzm.com/content/41577 | ||
4 | |||
5 | author://em[contains(@class, 'toAuthor')] | ||
6 | date:substring(//em[contains(@class, 'pubTime')],1) | ||
7 | body://section[contains(@id, 'articleContent')] | ||
8 | title://h1[contains(@class ,'articleHeadline clearfix')] | ||
9 | test_url: http://www.infzm.com/content/41577 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/inhabitat.com.txt b/inc/3rdparty/site_config/standard/inhabitat.com.txt deleted file mode 100755 index c63f53a6..00000000 --- a/inc/3rdparty/site_config/standard/inhabitat.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # set body | ||
2 | body: //div[@class='post-listing'] | ||
3 | |||
4 | # remove clutter | ||
5 | strip: //a/big | ||
6 | strip: //a/em | ||
7 | strip: //p/em | ||
8 | test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/instagr.am.txt b/inc/3rdparty/site_config/standard/instagr.am.txt deleted file mode 100755 index 522caebc..00000000 --- a/inc/3rdparty/site_config/standard/instagr.am.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='caption'] | ||
2 | author: //p[@class='username'] | ||
3 | |||
4 | strip: //div[@class='contents']/h3 | ||
5 | strip: //div[@class='location'] | ||
6 | test_url: http://instagr.am/p/G-s_aciyDJ/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/interest.co.nz.txt b/inc/3rdparty/site_config/standard/interest.co.nz.txt deleted file mode 100755 index 28c3310a..00000000 --- a/inc/3rdparty/site_config/standard/interest.co.nz.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | test_url: http://www.interest.co.nz/opinion/opinion-when-our-fear-corporate-way-and-our-love-small-business-man-dangerous-thing \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iolanguage.com.txt b/inc/3rdparty/site_config/standard/iolanguage.com.txt deleted file mode 100755 index 231875ad..00000000 --- a/inc/3rdparty/site_config/standard/iolanguage.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //center/table | ||
2 | test_url: http://www.iolanguage.com/scm/io/docs/IoGuide.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ipadclub.nl.txt b/inc/3rdparty/site_config/standard/ipadclub.nl.txt deleted file mode 100755 index afe058df..00000000 --- a/inc/3rdparty/site_config/standard/ipadclub.nl.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id = 'post'] | ||
2 | strip: //div[@class = 'postinfo'] | ||
3 | strip: //div[@id = 'postmetanew'] | ||
4 | strip: //div[@class = 'paginator'] | ||
5 | strip: //div[@class = 'col-2'] | ||
6 | strip: //div[@id = 'adfactor-label'] | ||
7 | test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt deleted file mode 100755 index dedb5572..00000000 --- a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id = 'post'] | ||
2 | strip: //div[@class = 'postinfo'] | ||
3 | strip: //div[@id = 'postmetanew'] | ||
4 | strip: //div[@class = 'paginator'] | ||
5 | strip: //div[@class = 'col-2'] | ||
6 | strip: //div[@id = 'adfactor-label'] | ||
7 | test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt deleted file mode 100755 index 850a24e9..00000000 --- a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id = 'post'] | ||
2 | strip: //div[@class = 'postinfo'] | ||
3 | strip: //div[@id = 'postmetanew'] | ||
4 | strip: //div[@class = 'paginator'] | ||
5 | strip: //div[@class = 'col-2'] | ||
6 | strip: //div[@id = 'adfactor-label'] | ||
7 | test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iphonehacks.com.txt b/inc/3rdparty/site_config/standard/iphonehacks.com.txt deleted file mode 100755 index e8ccea06..00000000 --- a/inc/3rdparty/site_config/standard/iphonehacks.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //meta[@name='og:title']/@content | ||
2 | body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')] | ||
3 | |||
4 | strip: //span[@vanilla-identifier] | ||
5 | |||
6 | prune: no | ||
7 | tidy: no | ||
8 | |||
9 | test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iplaysoft.com.txt b/inc/3rdparty/site_config/standard/iplaysoft.com.txt deleted file mode 100755 index 4a944768..00000000 --- a/inc/3rdparty/site_config/standard/iplaysoft.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@id='content']//div[@class='entry-banner' or @class='entry-content'] | ||
2 | test_url: http://www.iplaysoft.com/webbrowserpassview.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/isource.com.txt b/inc/3rdparty/site_config/standard/isource.com.txt deleted file mode 100755 index 215fdf87..00000000 --- a/inc/3rdparty/site_config/standard/isource.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | # Remove social buttons | ||
2 | strip: //div[@id='temp_Content_Right'] | ||
3 | |||
4 | # Remove duplicate article title | ||
5 | strip: //*[(@class='storytitle')] | ||
6 | test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itavisen.no.txt b/inc/3rdparty/site_config/standard/itavisen.no.txt deleted file mode 100755 index 3ba484a7..00000000 --- a/inc/3rdparty/site_config/standard/itavisen.no.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | author: //p[@class = 'writer'] | ||
2 | |||
3 | date: //p[@class = 'published-time'] | ||
4 | |||
5 | body: //div[@class = 'text main'] | ||
6 | test_url: http://www.itavisen.no/899786/old-republic-blir-gratis \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itmedia.co.jp.txt b/inc/3rdparty/site_config/standard/itmedia.co.jp.txt deleted file mode 100755 index 97f00ce8..00000000 --- a/inc/3rdparty/site_config/standard/itmedia.co.jp.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[@id='cmsBody'] | ||
2 | |||
3 | next_page_link: //span[@id='next']/a | ||
4 | |||
5 | strip_id_or_class: cmsCopyright | ||
6 | strip_id_or_class: masterSocialbuttonBtm | ||
7 | |||
8 | test_url: http://www.itmedia.co.jp/enterprise/articles/0912/05/news002.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itstactical.com.txt b/inc/3rdparty/site_config/standard/itstactical.com.txt deleted file mode 100755 index b8cb461c..00000000 --- a/inc/3rdparty/site_config/standard/itstactical.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | body: //div[@class='format_text entry-content'] | ||
3 | author: //span[@class="author vcard"]/a | ||
4 | date: //abbr[@class="published"] | ||
5 | |||
6 | strip_id_or_class: related-posts | ||
7 | strip_id_or_class: membershipbox | ||
8 | strip_id_or_class: share_this_compact_bt | ||
9 | |||
10 | |||
11 | footnotes: no | ||
12 | test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itunes.apple.com.txt b/inc/3rdparty/site_config/standard/itunes.apple.com.txt deleted file mode 100755 index ffd95561..00000000 --- a/inc/3rdparty/site_config/standard/itunes.apple.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | body: //div[@id='left-stack' or contains(@class, 'center-stack')] | ||
2 | |||
3 | find_string: class="artwork" src=" | ||
4 | replace_string: class="artwork" src-disabled=" | ||
5 | find_string: src-swap-high-dpi=" | ||
6 | replace_string: src=" | ||
7 | |||
8 | strip_id_or_class: rating | ||
9 | strip_id_or_class: listeners-also-bought | ||
10 | |||
11 | prune: no | ||
12 | |||
13 | test_url: https://itunes.apple.com/us/rss/topaudiobooks/limit=10/xml | ||
14 | test_url: https://itunes.apple.com/us/audiobook/the-giver-unabridged/id356345850 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itwire.com.txt b/inc/3rdparty/site_config/standard/itwire.com.txt deleted file mode 100755 index 72b41065..00000000 --- a/inc/3rdparty/site_config/standard/itwire.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: //a[@rel="author"] | ||
2 | date: //li[@class="itemDateCreated"] | ||
3 | strip: //div[contains(@class, 'legend-rounded')] | ||
4 | |||
5 | test_url: http://www.itwire.com/it-industry-news/market/59661-ibm-looks-to-high-value-solutions-to-meet-changing-demands | ||
diff --git a/inc/3rdparty/site_config/standard/itworld.com.txt b/inc/3rdparty/site_config/standard/itworld.com.txt deleted file mode 100755 index 1ee0ee58..00000000 --- a/inc/3rdparty/site_config/standard/itworld.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //*[@id="article-title"] | ||
2 | author: //*[@id="article-info"]/strong | ||
3 | date: //*[@class="article-dateline"]/strong | ||
4 | body: //*[@id="article-content"] | ||
5 | test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/izismile.com.txt b/inc/3rdparty/site_config/standard/izismile.com.txt deleted file mode 100755 index b0114d35..00000000 --- a/inc/3rdparty/site_config/standard/izismile.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[starts-with(@id, 'news-id-')] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jalopnik.com.txt b/inc/3rdparty/site_config/standard/jalopnik.com.txt deleted file mode 100755 index fc2eef8e..00000000 --- a/inc/3rdparty/site_config/standard/jalopnik.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | author: //span[@class='plus-icon'] | ||
2 | test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jandan.net.txt b/inc/3rdparty/site_config/standard/jandan.net.txt deleted file mode 100755 index 343fd6fb..00000000 --- a/inc/3rdparty/site_config/standard/jandan.net.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@id='content']//div[@class = 'post f'] | ||
2 | strip_id_or_class: comment-big | ||
3 | strip_id_or_class: avatar | ||
4 | strip: //div[@class='time_s'] | ||
5 | |||
6 | test_url: http://jandan.net/2011/04/03/iphone-5-sony.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt deleted file mode 100755 index 00e4cf63..00000000 --- a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //p[contains(@class, 'author')]/a | ||
3 | date: //p[contains(@class, 'time')] | ||
4 | body: //div[@class='content']/div[contains(@class, 'text')] | ||
5 | |||
6 | # prevent "no text" errors on multi-page articles | ||
7 | tidy: no | ||
8 | |||
9 | # we use a custom next-link detector instead of the print view because | ||
10 | # it's pretty hard to strip out the unwanted parts in the print view | ||
11 | autodetect_next_page: no | ||
12 | next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] | ||
13 | |||
14 | strip: //h1 | ||
15 | |||
16 | strip_id_or_class: meta | ||
17 | strip_id_or_class: author | ||
18 | strip_id_or_class: paging | ||
19 | |||
20 | # prevent "Report an Error" from being recognized as footnote | ||
21 | footnotes: no | ||
22 | test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jjahnke.net.txt b/inc/3rdparty/site_config/standard/jjahnke.net.txt deleted file mode 100755 index d45c8899..00000000 --- a/inc/3rdparty/site_config/standard/jjahnke.net.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://www.jjahnke.net/rundbr87.html#2514 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt deleted file mode 100755 index 1dbe2072..00000000 --- a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@id='formatCont_en'] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt deleted file mode 100755 index 241a361f..00000000 --- a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html | ||
2 | |||
3 | author: substring-after(//div[@class="author"], 'by ') | ||
4 | date: //div[@class="date"] | ||
5 | |||
6 | ## Clean stuff at top ## | ||
7 | |||
8 | strip: //h1[1] | ||
9 | strip: //h2[1] | ||
10 | strip: //div[@class="date"] | ||
11 | strip: //div[@class="author"] | ||
12 | |||
13 | ## Clean stuff at bottom ## | ||
14 | |||
15 | strip: //blockquote[@class="textmessage"] | ||
16 | strip: //div[@style="width:500px"]/p[last()] | ||
17 | strip: //div[@style="width:500px"]/p[last()-1] | ||
18 | strip: //div[@style="width:500px"]/h4[last()] | ||
19 | strip: //div[@style="width:500px"]/h4[last()-1] | ||
20 | strip: //div[@style="width:500px"]/div[last()] | ||
21 | test_url: http://www.joelonsoftware.com/items/2011/09/15.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jouire.com.txt b/inc/3rdparty/site_config/standard/jouire.com.txt deleted file mode 100755 index 3cf60672..00000000 --- a/inc/3rdparty/site_config/standard/jouire.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | author: //h1 | ||
2 | date: //p[contains(@class,'date')] | ||
3 | test_url: http://jouire.com/2011/01/exquisite-whispers/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/joystiq.com.txt b/inc/3rdparty/site_config/standard/joystiq.com.txt deleted file mode 100755 index 7a8e56f8..00000000 --- a/inc/3rdparty/site_config/standard/joystiq.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | author: //a[@class="byline-author"] | ||
2 | title: //h1[@class="headline"] | ||
3 | strip: //div[@id="info-card"] | ||
4 | strip: //div[@id="breaking-news"] | ||
5 | strip: //div[@class="rmod list-post-mod"] | ||
6 | strip: //div[@id="footer"] | ||
7 | strip: //div[@id="GH_strip"] | ||
8 | test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt deleted file mode 100755 index ff5a0244..00000000 --- a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | body: //div[@id='article_container'] | ||
2 | author: //h4//a[@class='author'] | ||
3 | title: //h1 | ||
4 | |||
5 | replace_string(lang="en"): lang="de" | ||
6 | replace_string(/>1</a>):/></a> | ||
7 | |||
8 | strip_id_or_class: share_toolbox | ||
9 | strip_id_or_class: article_header | ||
10 | strip_id_or_class: phototext | ||
11 | |||
12 | strip_image_src: icon_author.gif | ||
13 | |||
14 | strip: //img[@src=''] | ||
15 | strip: //h4[@id='author'] | ||
16 | |||
17 | prune: no | ||
18 | |||
19 | test_url: http://www.juedische-allgemeine.de/article/view/id/13366 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/juppy.org.txt b/inc/3rdparty/site_config/standard/juppy.org.txt deleted file mode 100755 index fdf7cdc9..00000000 --- a/inc/3rdparty/site_config/standard/juppy.org.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | convert_double_br_tags: yes | ||
2 | |||
3 | title: //div[@id="storycredits"]/p/span[@class="title"] | ||
4 | author: //div[@id="storycredits"]/p/br[1]/following-sibling::text() | ||
5 | |||
6 | strip: //div[@id="storycredits"] | ||
7 | |||
8 | test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kachestvo.ru.txt b/inc/3rdparty/site_config/standard/kachestvo.ru.txt deleted file mode 100755 index 535693c4..00000000 --- a/inc/3rdparty/site_config/standard/kachestvo.ru.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[contains(@class, 'inner_content')] | ||
2 | |||
3 | test_url: http://kachestvo.ru/promtovar/odezhda/denim.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kachiblog.com.txt b/inc/3rdparty/site_config/standard/kachiblog.com.txt deleted file mode 100755 index 57ab0de1..00000000 --- a/inc/3rdparty/site_config/standard/kachiblog.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h3[contains(@class, 'entry-title')] | ||
2 | date: //abbr[@itemprop='datePublished']/@title | ||
3 | body: //div[@itemprop='articleBody'] | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.kachiblog.com/2013/05/samsung-galaxy-s4-vs-samsung-galaxy.html | ||
7 | test_url: http://www.kachiblog.com/feed | ||
diff --git a/inc/3rdparty/site_config/standard/kathimerini.gr.txt b/inc/3rdparty/site_config/standard/kathimerini.gr.txt deleted file mode 100755 index 2c7c518c..00000000 --- a/inc/3rdparty/site_config/standard/kathimerini.gr.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //td[contains(@class, 'articleTitlos')] | ||
2 | body: //td[contains(@class, 'eelantext')] | ||
3 | |||
4 | test_url: http://www.kathimerini.gr/4dcgi/_w_articles_kathremote_1_03/12/2013_530490 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kenrockwell.com.txt b/inc/3rdparty/site_config/standard/kenrockwell.com.txt deleted file mode 100755 index 90c64cbf..00000000 --- a/inc/3rdparty/site_config/standard/kenrockwell.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | # Ads | ||
2 | strip: //table[@align="right"][@width="120"] | ||
3 | |||
4 | # Affiliate link paragraphs | ||
5 | strip: //a[.="Adorama"]/parent::p[contains(., "goodies")] | ||
6 | strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")] | ||
7 | test_url: http://www.kenrockwell.com/tech/composition.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kicker.de.txt b/inc/3rdparty/site_config/standard/kicker.de.txt deleted file mode 100755 index db4f63c4..00000000 --- a/inc/3rdparty/site_config/standard/kicker.de.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # set body | ||
2 | body: //div[@id='ovArtikel'] | ||
3 | |||
4 | # set title | ||
5 | title: //div[@id='ovArtikel']/h1 | ||
6 | # strip main title and leave sub title | ||
7 | strip: //div[@id='ovArtikel']/h1 | ||
8 | |||
9 | date: //div[@class='publicdate'] | ||
10 | |||
11 | #remove captions | ||
12 | strip: //*/div[@class='bu'] | ||
13 | strip: //*/div[@class='credit'] | ||
14 | |||
15 | #remove adds | ||
16 | strip: //*/div[@class='ad-head'] | ||
17 | strip: //*/div[@class='linksebay'] | ||
18 | |||
19 | # remove video content | ||
20 | strip: //*/div[@class='ovVideo'] | ||
21 | test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kickstarter.com.txt b/inc/3rdparty/site_config/standard/kickstarter.com.txt deleted file mode 100755 index 7b3daa58..00000000 --- a/inc/3rdparty/site_config/standard/kickstarter.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[@id='name'] | ||
2 | body: //*[@id='leftcol'] | ||
3 | |||
4 | strip_id_or_class: 'share-box' | ||
5 | strip_id_or_class: 'project-faqs' | ||
6 | strip_id_or_class: 'report-issue-wrap' | ||
7 | test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt deleted file mode 100755 index b27539f5..00000000 --- a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //div[@class='post']/h2 | ||
2 | body: //div[@class='entry'] | ||
3 | strip: //p[contains(.,'Tags:')] | ||
4 | test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kotaku.com.txt b/inc/3rdparty/site_config/standard/kotaku.com.txt deleted file mode 100755 index be439d75..00000000 --- a/inc/3rdparty/site_config/standard/kotaku.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | author: //span[@class="plus-icon"] | ||
2 | test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kottke.org.txt b/inc/3rdparty/site_config/standard/kottke.org.txt deleted file mode 100755 index 582f251c..00000000 --- a/inc/3rdparty/site_config/standard/kottke.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2 | ||
2 | author: //*[@id='main']/div/a[1] | ||
3 | date: substring-before(substring-after(//div[@class='meta'],'•'),'•') | ||
4 | body: //div[@id='main'] | ||
5 | strip: //div[@class='meta'] | ||
6 | test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kumailplus.com.txt b/inc/3rdparty/site_config/standard/kumailplus.com.txt deleted file mode 100755 index 2f604de0..00000000 --- a/inc/3rdparty/site_config/standard/kumailplus.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class = "entry-full"] | ||
2 | |||
3 | test_url: http://www.kumailplus.com/2011/12/02/24308 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kumb.com.txt b/inc/3rdparty/site_config/standard/kumb.com.txt deleted file mode 100755 index fe350622..00000000 --- a/inc/3rdparty/site_config/standard/kumb.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //div[@id='centrediv']/h1 | ||
2 | |||
3 | author: substring-after(//div[@id='centrediv']/h3,'By: ') | ||
4 | |||
5 | date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ') | ||
6 | |||
7 | body: //div[@class='KonaBody'] | ||
8 | |||
9 | convert_double_br_tags: yes | ||
10 | test_url: http://www.kumb.com/story.php?id=126084 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt deleted file mode 100755 index cf4d3b8c..00000000 --- a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | date: //span[@class='datum'] | ||
2 | title: //div[@class='artikel']/h2 | ||
3 | body: //div[@class='entry'] | ||
4 | strip: //p[@class='tags'] | ||
5 | author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ') | ||
6 | strip: //div[@class='authorinfo'] | ||
7 | strip: //div[@class='authorpic'] | ||
8 | |||
9 | test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/landetsfria.se.txt b/inc/3rdparty/site_config/standard/landetsfria.se.txt deleted file mode 100755 index e5317a5a..00000000 --- a/inc/3rdparty/site_config/standard/landetsfria.se.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] | ||
2 | author: //article//div[contains(@class, 'field-byline')] | ||
3 | strip_id_or_class: rekommenderade | ||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: annonser | ||
6 | |||
7 | test_url: http://www.landetsfria.se/artikel/112070 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt deleted file mode 100755 index d25999d0..00000000 --- a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //h1[@class='headline'] | ||
2 | body: //div[@class='article'] | ||
3 | strip: //div[@class='article']//h3[contains(@class, 'section')] | ||
4 | strip: //div[@class='article']//ul[contains(@class, 'article-actions')] | ||
5 | strip: //div[@id='syndication-upper'] | ||
6 | strip: //a[@id='syndication'] | ||
7 | strip: //dl[@id='article-tags'] | ||
8 | strip: //div[@id='article-like'] | ||
9 | prune: no | ||
10 | |||
11 | single_page_link: //li[@class='single-page']/a | ||
12 | |||
13 | test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt deleted file mode 100755 index 82374c0b..00000000 --- a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | tidy: no | ||
2 | |||
3 | test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laquadrature.net.txt b/inc/3rdparty/site_config/standard/laquadrature.net.txt deleted file mode 100755 index 746bfca7..00000000 --- a/inc/3rdparty/site_config/standard/laquadrature.net.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@id='content-content']//div[@class='content'] | ||
2 | title: //h1[@class='title'] | ||
3 | date: substring-after(//*[@class='submitted'],'Submitted on') | ||
4 | tidy: no | ||
5 | strip: //div[@class='terms terms-inline'] | ||
6 | strip: //div[@class='more'] | ||
7 | strip: //div[@class='share-links'] | ||
8 | strip: //table[@id='attachments'] | ||
9 | |||
10 | test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt deleted file mode 100755 index 25e36543..00000000 --- a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | #metadata | ||
2 | title: substring-before(//title,' |') | ||
3 | author: //a[contains(@class,'person') and starts-with(@href, '/contributor')] | ||
4 | |||
5 | #text | ||
6 | body: //div[contains(@class, 'article_body')] | ||
7 | |||
8 | #clean up | ||
9 | strip_id_or_class: recommended_section | ||
10 | |||
11 | test_url: http://lareviewofbooks.org/review/american-politics-redeembale-robert-gates-hillary-clinton-two-memoirs-washington-dc | ||
12 | test_url: http://lareviewofbooks.org/interview/souvenirs-future | ||
diff --git a/inc/3rdparty/site_config/standard/latimes.com.txt b/inc/3rdparty/site_config/standard/latimes.com.txt deleted file mode 100755 index b2db37bf..00000000 --- a/inc/3rdparty/site_config/standard/latimes.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | strip: //div[@id="tugs_story_display"] | ||
2 | strip: //div[@id="search_overlay"] | ||
3 | strip: //div[@id="adv_search"] | ||
4 | body: //div[@class='story'] | ||
5 | tidy: no | ||
6 | convert_double_br_tags: yes | ||
7 | single_page_link: //a[contains(@href, ',print.')] | ||
8 | strip: //p[starts-with(., 'latimes.com')] | ||
9 | strip: //h1[starts-with(., 'latimes.com')] | ||
10 | strip_id_or_class: cubead | ||
11 | test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laughingsquid.com.txt b/inc/3rdparty/site_config/standard/laughingsquid.com.txt deleted file mode 100755 index ab2f834f..00000000 --- a/inc/3rdparty/site_config/standard/laughingsquid.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | body: //div[@class='entry-content'] | ||
3 | test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/leancrew.com.txt b/inc/3rdparty/site_config/standard/leancrew.com.txt deleted file mode 100755 index e78cf7e6..00000000 --- a/inc/3rdparty/site_config/standard/leancrew.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id="content"]/h1[1] | ||
2 | date: substring-before(//p[@class="postdate"], ' at ') | ||
3 | author: ("Dr. Drang") | ||
4 | |||
5 | strip: //div[@id="content"]/h1[1] | ||
6 | strip: //p[@class="postdate"] | ||
7 | strip: //h2[@id="respond"] | ||
8 | strip: //blockquote[@class="bbpTweet"]/p/span/a/img | ||
9 | test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lefigaro.fr.txt b/inc/3rdparty/site_config/standard/lefigaro.fr.txt deleted file mode 100755 index e720e377..00000000 --- a/inc/3rdparty/site_config/standard/lefigaro.fr.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //meta[@name='title']/@content | ||
2 | author: //span[@class='sign']//a[@class='journaliste'] | ||
3 | author: //meta[@name='author']/@content | ||
4 | body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] | ||
5 | date: //time[@pubdate]/@datetime | ||
6 | prune: no | ||
7 | test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php | ||
8 | test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lemonde.fr.txt b/inc/3rdparty/site_config/standard/lemonde.fr.txt deleted file mode 100755 index 097999b6..00000000 --- a/inc/3rdparty/site_config/standard/lemonde.fr.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | # We can have multiple authors | ||
4 | author: //a[@class='auteur'] | ||
5 | |||
6 | # Last edition date (if any) | ||
7 | date: //time[@itemprop='dateModified']/@datetime | ||
8 | # Publication date | ||
9 | date: //time[@itemprop='datePublished']/@datetime | ||
10 | |||
11 | |||
12 | body: //div[@id='articleBody'] | ||
13 | #Shoot the insane "conjugaison.lemonde.fr" links : | ||
14 | #strip: //a[contains(@class, 'conjug')] | ||
15 | |||
16 | prune: no | ||
17 | |||
18 | test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html | ||
diff --git a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt deleted file mode 100755 index 51e025ae..00000000 --- a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h1/following::span[@class='fn'] | ||
2 | # Author: should stop parsing until <br> reached, but I don't know how to do this. | ||
3 | author: //following::div[@class='PDate2'] | ||
4 | date: //following::div[@class='PDate2']/strong | ||
5 | |||
6 | body: //div[@class='ArTexte'] | ||
7 | body: //div[@id='prod_txt_b'] | ||
8 | body: //div[@class='ArPhotoP'] | ||
9 | test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/letemps.ch.txt b/inc/3rdparty/site_config/standard/letemps.ch.txt deleted file mode 100755 index 49b019f9..00000000 --- a/inc/3rdparty/site_config/standard/letemps.ch.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h2 | ||
2 | strip_image_src: logo.gif | ||
3 | test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/libcom.org.txt b/inc/3rdparty/site_config/standard/libcom.org.txt deleted file mode 100755 index d1404d10..00000000 --- a/inc/3rdparty/site_config/standard/libcom.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | date: //span[contains(@class, 'page-date')] | ||
2 | body: //div[@id='node-page'] | ||
3 | strip_id_or_class: book-navigation | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://libcom.org/library/what-was-the-ussr-aufheben-1 | ||
7 | test_url: http://libcom.org/library-latest/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt deleted file mode 100755 index 0e1dceb1..00000000 --- a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h2[@class="entry-title"] | ||
2 | body: //div[@class="entry-content"] | ||
3 | test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lifehacker.co.uk.txt b/inc/3rdparty/site_config/standard/lifehacker.co.uk.txt deleted file mode 100755 index c540f7f3..00000000 --- a/inc/3rdparty/site_config/standard/lifehacker.co.uk.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@itemprop='headline'] | ||
2 | body: //noscript/img | //div[@itemprop='text'] | ||
3 | author: //div[@class='meta meta--post']//a[@class='is-author'] | ||
4 | date: //div[@class='meta meta--post']//time/@datetime | ||
5 | |||
6 | test_url: http://www.lifehacker.co.uk/2014/08/22/dealhacker-10-google-chromecast-super-cheap-batteries-much | ||
7 | test_url: http://www.lifehacker.co.uk/2014/08/18/andrognito-hides-files-youd-like-keep-away-prying-eyes | ||
diff --git a/inc/3rdparty/site_config/standard/lifehacker.com.txt b/inc/3rdparty/site_config/standard/lifehacker.com.txt deleted file mode 100755 index ec97f06c..00000000 --- a/inc/3rdparty/site_config/standard/lifehacker.com.txt +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | # Adds author text: Gawker sites commonly show as "Author: View Profile" | ||
2 | author://a[@class="plus-icon modfont"] | ||
3 | |||
4 | # Add date and time | ||
5 | date: //span[@class="date"] | ||
6 | |||
7 | body: //div[contains(@class, 'marquee-asset-wrapper') or contains(@class, 'post-content')] | ||
8 | |||
9 | # Remove date and time from article text | ||
10 | strip: //span[@class="date"] | ||
11 | |||
12 | # Remove login/comment text | ||
13 | strip: //*[(@class="presence_control_external smalltype")] | ||
14 | |||
15 | strip: //div[@class="nodebyline modfont"] | ||
16 | |||
17 | # Remove right sidebar | ||
18 | strip: //div[@id="rightwrapper"] | ||
19 | |||
20 | # Remove print header | ||
21 | strip: //div[@id='printhead']/h1 | ||
22 | |||
23 | # Remove 'content is restricted' | ||
24 | strip: //div[@id='agegate_IDHERE'] | ||
25 | |||
26 | # Remove follow text | ||
27 | strip: //*[(@class="permalink_ads")] | ||
28 | |||
29 | strip_id_or_class: inset_groups | ||
30 | |||
31 | # Remove view/comment count | ||
32 | strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line'] | ||
33 | |||
34 | # Remove contact text | ||
35 | strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo'] | ||
36 | |||
37 | # Remove medium duplicates of the article image | ||
38 | strip_image_src: medium.jpg | ||
39 | |||
40 | # Remove "arrow" class at bottom of page | ||
41 | strip: //p[@class="arrow"] | ||
42 | |||
43 | # Remove "track" image from article body | ||
44 | strip: //img[@alt="track"] | ||
45 | test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos | ||
46 | test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse | ||
47 | test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt b/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt deleted file mode 100755 index 25d544ae..00000000 --- a/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[@class='singlePageTitle'] | ||
2 | |||
3 | strip: //p[contains(text(), 'Follow Us')] | ||
4 | strip: //p/strong[contains(text(), 'Recent Stories:')] | ||
5 | strip: //div[@id="sharefeature"] | ||
6 | |||
7 | test_url: http://lifestyle.inquirer.net/100223/dusting-your-ceiling-fan | ||
diff --git a/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt b/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt deleted file mode 100755 index e09f6692..00000000 --- a/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.lifeweek.com.cn/2012/1211/39439.shtml | ||
3 | # http://www.lifeweek.com.cn/2013/0308/40213.shtml | ||
4 | |||
5 | title:substring-before(//h1, '(') | ||
6 | title://h1 | ||
7 | date://ul[@class='authorbox']/li | ||
8 | author: substring-after(//ul[@class='authorbox']/li/following-sibling::li, '作者:') | ||
9 | |||
10 | next_page_link: //div[@class='pageturn_list']/a[@class='pagedown'] | ||
11 | body: //div[@class='original '] | ||
12 | |||
13 | strip://h1 | ||
14 | strip://ul[@class='authorbox'] | ||
15 | strip://span[@class='app_p'] | ||
16 | strip://div[@style='text-align:right;'] | ||
17 | strip://div[@class='pageturn_list'] | ||
18 | strip://div[@class='lifespeaks'] | ||
19 | strip://div[@class='vright fr'] | ||
20 | strip://div[@class='copyrt mg20'] | ||
21 | strip://div[@class='keyabout mg20'] | ||
22 | strip://ul[@class='readabout mg20'] | ||
23 | test_url: http://www.lifeweek.com.cn/2013/0308/40213.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/linkedin.com.txt b/inc/3rdparty/site_config/standard/linkedin.com.txt deleted file mode 100755 index 37e83cf6..00000000 --- a/inc/3rdparty/site_config/standard/linkedin.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | single_page_link: //ul[@class='util-nav']//a[@class='close'] | ||
2 | test_url: http://www.linkedin.com/news?actionBar=&articleID=894735221&ids=0Rdj4Qe3wQejwIczAOc3sRdzwUb3wScPoPdzkVe2MNcz8RcPsQejwIcPASdjwTcjwU&aag=true&freq=weekly \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/livescience.com.txt b/inc/3rdparty/site_config/standard/livescience.com.txt deleted file mode 100755 index 5275d34a..00000000 --- a/inc/3rdparty/site_config/standard/livescience.com.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | title: //div[@class="album_title"]//h1 | ||
2 | author: substring-before(//div[@class='by_line'], ',') | ||
3 | date: substring-after(substring-before(//div[@class="album_time"], ' Time'), 'Date: ') | ||
4 | body: //div[@class="about_text"] | ||
5 | |||
6 | strip: //div[@class='large_popper'] | ||
7 | strip: //span[contains(@id, 'mag_glass')] | ||
8 | strip: //span[contains(@class, 'img_overlay')] | ||
9 | strip: //td//span | ||
10 | strip: //div[@class="center_adsense"] | ||
11 | strip: //div[@class="article_info"]//div[@class='asset_section'] | ||
12 | strip: //div[@class="article_additional"] | ||
13 | strip: //div[contains(@style, 'overflow:hidden')] | ||
14 | strip: //div[@class="aa_text"] | ||
15 | strip: //div[@id='nointelliTXT'] | ||
16 | |||
17 | prune: no | ||
18 | autodetect_on_failure: no | ||
19 | |||
20 | test_url: http://www.livescience.com/34569-why-flowers-close-at-night-nyctinasty.html | ||
diff --git a/inc/3rdparty/site_config/standard/longform.org.txt b/inc/3rdparty/site_config/standard/longform.org.txt deleted file mode 100755 index 1310ec0d..00000000 --- a/inc/3rdparty/site_config/standard/longform.org.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link: //div[@class="post"]/div[@class="title"]/a | ||
2 | |||
3 | test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/loopinsight.com.txt b/inc/3rdparty/site_config/standard/loopinsight.com.txt deleted file mode 100755 index 730af947..00000000 --- a/inc/3rdparty/site_config/standard/loopinsight.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@class='container_16']//div[@class='grid_11'] | ||
2 | strip: //h2[@class='mast'] | ||
3 | strip: //div[@class='container_16']//div[@class='grid_11']/h1 | ||
4 | strip: //div[@class='container_16']//div[@class='grid_11']/p[1] | ||
5 | strip: //div[@class='container_16']//div[@class='grid_11']/div | ||
6 | author: //a[starts-with(@title, 'Posts by')] | ||
7 | date: substring-before(substring-after(//time, 'Posted on '), ' at') | ||
8 | test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/ | ||
9 | test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lostgarden.com.txt b/inc/3rdparty/site_config/standard/lostgarden.com.txt deleted file mode 100755 index d7eb0fa0..00000000 --- a/inc/3rdparty/site_config/standard/lostgarden.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | prune: no | ||
2 | convert_double_br_tags: yes | ||
3 | test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lovefm.com.txt b/inc/3rdparty/site_config/standard/lovefm.com.txt deleted file mode 100755 index 20d26c56..00000000 --- a/inc/3rdparty/site_config/standard/lovefm.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //*[@id='title'] | ||
2 | date: //*[@id='date'] | ||
3 | body: //*[@id='desc'] | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.lovefm.com/local_news.php?item=2176 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lovetv.com.bz.txt b/inc/3rdparty/site_config/standard/lovetv.com.bz.txt deleted file mode 100755 index a71fccdd..00000000 --- a/inc/3rdparty/site_config/standard/lovetv.com.bz.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[contains(@class, 'post')]//h1 | ||
2 | body: //div[contains(@class, 'post')] | ||
3 | strip: //hr | ||
4 | strip_id_or_class: post-meta | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.lovetv.com.bz/2013/06/28/recently-discovered-ancient-maya-wooden-canoe-paddle-to-be-handed-over-to-archaeology/ | ||
9 | test_url: http://www.lovetv.com.bz/feed/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lrb.co.uk.txt b/inc/3rdparty/site_config/standard/lrb.co.uk.txt deleted file mode 100755 index f1aacb7d..00000000 --- a/inc/3rdparty/site_config/standard/lrb.co.uk.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //div[contains(@class, "article-body")]/hgroup/h1 | ||
2 | body: //div[contains(@class, "article-body")] | ||
3 | |||
4 | date: substring-after(//p[@class="meta-info"]/a, '· ') | ||
5 | |||
6 | author: //div[contains(@class, "article-body")]/hgroup/h2 | ||
7 | |||
8 | strip_id_or_class: print-hide | ||
9 | strip_id_or_class: books | ||
10 | |||
11 | test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened | ||
12 | test_url: http://www.lrb.co.uk/v36/n13/benjamin-kunkel/paupers-and-richlings | ||
diff --git a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt deleted file mode 100755 index b445f5eb..00000000 --- a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2 | ||
2 | |||
3 | body: // div[@id='content'] | ||
4 | |||
5 | strip: //div[@class='sidebar_wrapper'] | ||
6 | test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/luxuo.com.txt b/inc/3rdparty/site_config/standard/luxuo.com.txt deleted file mode 100755 index a3d5cb17..00000000 --- a/inc/3rdparty/site_config/standard/luxuo.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class='post-content'] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://www.luxuo.com/watches/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt deleted file mode 100755 index d1ff0b43..00000000 --- a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div[@class="story-body"]/div[@class="story-inner"]/h1 | ||
2 | body: //div[@class="story-body"] | ||
3 | date: //p[@class='date']/strong | ||
4 | author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') | ||
5 | |||
6 | strip: //div[@class="story-inner"]/div[@class="byline"] | ||
7 | |||
8 | test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/m.douban.com.txt b/inc/3rdparty/site_config/standard/m.douban.com.txt deleted file mode 100755 index ce9a3167..00000000 --- a/inc/3rdparty/site_config/standard/m.douban.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://m.douban.com/note/240776310/?session=6ac86d1e | ||
3 | # http://m.douban.com/note/208270705/?session=e00ec732_3433229 | ||
4 | |||
5 | title: //h2 | ||
6 | author: //a[@class='founder'] | ||
7 | date: substring-after(//span[@class='info'],' | ') | ||
8 | body: //div[contains(@class,'entry item')] | ||
9 | |||
10 | strip://span[contains(@class,'info')] | ||
11 | |||
12 | convert_double_br_tags: yes | ||
13 | test_url: http://m.douban.com/note/240776310/?session=6ac86d1e \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt b/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt deleted file mode 100644 index f5f0dfca..00000000 --- a/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //p[@class="txhead"] | ||
2 | author: //div[@class='txb'] | ||
3 | wrap_in(p): //div[@class='para'] | ||
4 | date: //div[@class='txb']/following-sibling::p/text()[substring(., 14)] | ||
5 | strip: //table[@class="tlogo"] | ||
6 | strip: //div[@class="cookieText"] | ||
7 | strip: //*[@class="sltb"] | ||
8 | strip: //*[@class="ijobs-x-link"] | ||
9 | strip: //*[@class="sponscolour"] | ||
10 | strip: //*[@class="sponsouter"] | ||
11 | strip: //div[@id="bottom-nav-block"]/following::* | ||
12 | test_url: http://m.guardian.co.uk/ms/p/gnm/op/s3OOwgO3yIhGuj41C1_S3Xg/view.m?id=15&gid=world/2012/jul/26/arctic-climate-change&cat=top-stories \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/m.vanityfair.com.txt b/inc/3rdparty/site_config/standard/m.vanityfair.com.txt deleted file mode 100755 index e47ce2ce..00000000 --- a/inc/3rdparty/site_config/standard/m.vanityfair.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | # Article Metadata | ||
2 | title: //h1 | ||
3 | author: //span[@class="name"]/a | ||
4 | date: //time | ||
5 | |||
6 | # Content Pruning | ||
7 | strip: //h5 | ||
8 | strip: //time | ||
9 | strip: //div[@class="byline"] | ||
10 | strip: //h2[@class="headline "] | ||
11 | test_url: http://m.vanityfair.com/politics/2012/10/michael-lewis-profile-barack-obama \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/m00natic.github.io.txt b/inc/3rdparty/site_config/standard/m00natic.github.io.txt deleted file mode 100644 index 911fcbd0..00000000 --- a/inc/3rdparty/site_config/standard/m00natic.github.io.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | # Generated by FiveFilters.org's web-based selection tool | ||
2 | # Place this file inside your site_config/custom/ folder | ||
3 | # Source: http://siteconfig.fivefilters.org/grab.php?url=https%3A%2F%2Fm00natic.github.io%2Femacs%2Femacs-wiki.html | ||
4 | |||
5 | body: //div[@id='content'] | ||
6 | strip_id_or_class: table-of-contents | ||
7 | test_url: https://m00natic.github.io/emacs/emacs-wiki.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mac4ever.com.txt b/inc/3rdparty/site_config/standard/mac4ever.com.txt deleted file mode 100755 index 9999758b..00000000 --- a/inc/3rdparty/site_config/standard/mac4ever.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: substring-after(//div[@class='author'],'Par ') | ||
2 | date: //div[@class='date'] | ||
3 | body: //div[@class='content'] | ||
4 | |||
5 | test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macdrifter.com.txt b/inc/3rdparty/site_config/standard/macdrifter.com.txt deleted file mode 100755 index e57bd640..00000000 --- a/inc/3rdparty/site_config/standard/macdrifter.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | title: substring-before(//title,' « Macdrifter') | ||
2 | test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt deleted file mode 100755 index 522efb49..00000000 --- a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # Remove news feed | ||
2 | strip: //div[@id='news_feed_front'] | ||
3 | |||
4 | # Remove pull quote | ||
5 | strip: //div[@class='field field-type-text field-field-pull-quote'] | ||
6 | |||
7 | # Remove login | ||
8 | strip: //div[@class='right_bar_login'] | ||
9 | test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macgeneration.com.txt b/inc/3rdparty/site_config/standard/macgeneration.com.txt deleted file mode 100755 index 739eff4e..00000000 --- a/inc/3rdparty/site_config/standard/macgeneration.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le') | ||
2 | date: substring-after(//div[@class='dateNews'],' le ') | ||
3 | body: //div[@class='singleNews zoneApple'] | ||
4 | |||
5 | test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt deleted file mode 100755 index da7df695..00000000 --- a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # Remove sliders | ||
2 | strip: //*[(@class="slides_container")] | ||
3 | strip: //div[(@id="slides_two")] | ||
4 | |||
5 | # Remove tag cloud | ||
6 | strip: //span[(@class="secao")] | ||
7 | |||
8 | # Fix date article | ||
9 | # TODO | ||
10 | |||
11 | # Remove other stuff | ||
12 | strip: //div[(@id="idc-container")] | ||
13 | strip: //div[(@id="idc-noscript")] | ||
14 | strip: //div[(@class="linkwithin_div")] | ||
15 | strip: //div[(@class="navPosts")] | ||
16 | strip: //div[(@id="lateral")] | ||
17 | strip: //div[(@id="autor")] | ||
18 | strip: //div[(@id="rodape")] | ||
19 | strip: //div[(@id="post")]/h1 | ||
20 | strip: //div[(@id="post")]/div[(@id="boxInformacoes")] | ||
21 | test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macrumors.com.txt b/inc/3rdparty/site_config/standard/macrumors.com.txt deleted file mode 100755 index 83cfb4a6..00000000 --- a/inc/3rdparty/site_config/standard/macrumors.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | author: substring-after(//div[@class='byline'], " by ") | ||
2 | date: substring-before(//div[@class='byline'], " by ") | ||
3 | |||
4 | # set body | ||
5 | body: //div[@class='content'] | ||
6 | strip_id_or_class: commentsContainer | ||
7 | strip_id_or_class: linkback | ||
8 | |||
9 | # set title | ||
10 | title: //h3 | ||
11 | #strip: //div[@class='content']/h3 | ||
12 | test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/ | ||
diff --git a/inc/3rdparty/site_config/standard/macstories.net.txt b/inc/3rdparty/site_config/standard/macstories.net.txt deleted file mode 100755 index 639fdd19..00000000 --- a/inc/3rdparty/site_config/standard/macstories.net.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | strip: //*[(@id = "featured")] | ||
2 | |||
3 | author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') | ||
4 | |||
5 | date: concat(//div[@class='month'],' ',//div[@class='day']) | ||
6 | |||
7 | #macstories doesn't provide a year, but month/day is better than nothing | ||
8 | test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mactalk.com.au.txt b/inc/3rdparty/site_config/standard/mactalk.com.au.txt deleted file mode 100755 index 9be865af..00000000 --- a/inc/3rdparty/site_config/standard/mactalk.com.au.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author://div[@class="article_username_container_full"] | ||
2 | date://div[@class="article_username_container"] | ||
3 | body://div[@class="article cms_clear restore postcontainer"] | ||
4 | test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mactechnews.de.txt b/inc/3rdparty/site_config/standard/mactechnews.de.txt deleted file mode 100755 index 5c03518a..00000000 --- a/inc/3rdparty/site_config/standard/mactechnews.de.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: substring-after(substring-after(//title, '>'), '>') | ||
2 | body: //div[@class='NewsArticleContent'] | ||
3 | test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macworld.com.txt b/inc/3rdparty/site_config/standard/macworld.com.txt deleted file mode 100755 index e7d97202..00000000 --- a/inc/3rdparty/site_config/standard/macworld.com.txt +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | title: //article//h1 | ||
2 | date: //meta[@name="date"]/@content | ||
3 | author: //div[@class="author-name" or @class="article-byline"]/a[1] | ||
4 | |||
5 | body: //section[@class="page"] | ||
6 | |||
7 | # remove 'From the Lab' and 'Recent posts' text | ||
8 | strip: //div[@class='blogLabel'] | ||
9 | |||
10 | # remove byline and meta info | ||
11 | strip: //div[@class="article-meta"] | ||
12 | strip: //div[@class="author-info"] | ||
13 | |||
14 | #strip tags and categories | ||
15 | strip: //div[@class="department"] | ||
16 | |||
17 | #strip product cap links | ||
18 | strip: //div[@class="cap-main"] | ||
19 | strip: //div[@id="compare-lede"] | ||
20 | |||
21 | prune: no | ||
22 | |||
23 | # copes less well with Review pages, seems fine for News | ||
24 | test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mainichi.jp.txt b/inc/3rdparty/site_config/standard/mainichi.jp.txt deleted file mode 100755 index 414a2f53..00000000 --- a/inc/3rdparty/site_config/standard/mainichi.jp.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class='NewsArticle'] | ||
2 | |||
3 | test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mainpost.de.txt b/inc/3rdparty/site_config/standard/mainpost.de.txt deleted file mode 100755 index 2f6382f1..00000000 --- a/inc/3rdparty/site_config/standard/mainpost.de.txt +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | title: substring-before(//title, '|') | ||
2 | body: //*[@id='content-left'] | ||
3 | |||
4 | # Why is this not working here? | ||
5 | # body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail'] | ||
6 | |||
7 | |||
8 | #Header | ||
9 | strip_id_or_class: 'subHead' | ||
10 | strip_id_or_class: 'fl_right' | ||
11 | strip_id_or_class: 'infolink' | ||
12 | strip_id_or_class: 'content-head' | ||
13 | strip_id_or_class: 'tab' | ||
14 | strip_id_or_class: 'tab-active' | ||
15 | strip: //*[contains(@class,'trenner')] | ||
16 | |||
17 | # Headline | ||
18 | strip: //h1/* | ||
19 | strip_id_or_class: 'font16' | ||
20 | |||
21 | #Images | ||
22 | strip_id_or_class: 'leftimage' | ||
23 | strip_id_or_class: 'rightimage' | ||
24 | |||
25 | #Comments | ||
26 | strip: //table | ||
27 | strip: //p/following-sibling::*[0] | ||
28 | test_url: http://www.mainpost.de/regional/wuerzburg/Autobahnschuetze-Staatsanwalt-fordert-zwoelf-Jahre;art492151,8386332 | ||
diff --git a/inc/3rdparty/site_config/standard/makeuseof.com.txt b/inc/3rdparty/site_config/standard/makeuseof.com.txt deleted file mode 100755 index 078e8d08..00000000 --- a/inc/3rdparty/site_config/standard/makeuseof.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | |||
3 | body: //article//header//img | //article//section[@class='post'] | ||
4 | |||
5 | strip: //article//section[@class='post']/aside | ||
6 | strip: //article//section[@class='post']/footer | ||
7 | |||
8 | test_url: http://www.makeuseof.com/tag/cool-websites-and-tools-advanced-photo-editor-keep-your-kids-stuff-online-identify-60-languages/ | ||
9 | test_url: http://www.makeuseof.com/tag/what-do-you-think-of-our-new-look-makeuseof-poll/ | ||
diff --git a/inc/3rdparty/site_config/standard/manager.co.th.txt b/inc/3rdparty/site_config/standard/manager.co.th.txt deleted file mode 100755 index cd6c5c01..00000000 --- a/inc/3rdparty/site_config/standard/manager.co.th.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | title: //td[@class="headline"] | ||
2 | author: //font[@color="#003366"] | ||
3 | date: //td[@class="date"] | ||
4 | |||
5 | strip: //td[@class="headline"] | ||
6 | strip: //font[@color="#003366"] | ||
7 | strip: //td[@class="date"] | ||
8 | |||
9 | strip: //img[@src="images/2009/logo_en.gif"] | ||
10 | |||
11 | body: //tbody[@class="body"] | ||
12 | convert_double_br_tags:yes | ||
13 | |||
14 | strip: //img[@src="/images/TabOver.gif"] | ||
15 | strip: //td[@width="160"] | ||
16 | strip: //img[@src="/images/TabUnder.gif"] | ||
17 | |||
18 | strip: //td[@class="small"] | ||
19 | strip: //td[@height="47"] | ||
20 | |||
21 | strip: //td[@valign="middle"] | ||
22 | strip: //td[@background="/images/menu_bottombg.gif"] | ||
23 | strip: //img[@src="/images/sc_footer_l.gif"] | ||
24 | strip: //img[@src="/images/sc_footer_m.gif"] | ||
25 | strip: //img[@src="/images/sc_footer_r.gif"] | ||
26 | test_url: http://www.manager.co.th/Entertainment/ViewNews.aspx?NewsID=9550000101979 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/marco.org.txt b/inc/3rdparty/site_config/standard/marco.org.txt deleted file mode 100755 index 4bb24a62..00000000 --- a/inc/3rdparty/site_config/standard/marco.org.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | date: //article//time[@pubdate] | ||
4 | title: //article/header/h2 | ||
5 | body: //article | ||
6 | strip: //header | ||
7 | test_url: http://www.marco.org/2012/09/08/businessweek-gruber | ||
8 | test_url: http://www.marco.org/2012/04/24/might-upgrade-someday \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/marksdailyapple.com.txt b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt deleted file mode 100755 index 0077f560..00000000 --- a/inc/3rdparty/site_config/standard/marksdailyapple.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip_id_or_class: wwsgd | ||
2 | test_url: http://www.marksdailyapple.com/are-detoxes-and-cleanses-safe-and-effective/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/martinfowler.com.txt b/inc/3rdparty/site_config/standard/martinfowler.com.txt deleted file mode 100755 index 4ff4a9c2..00000000 --- a/inc/3rdparty/site_config/standard/martinfowler.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | date: //div[@id="main"]/p[@class="date"] | ||
2 | author: string("Martin Fowler") | ||
3 | body: //div[@id="main"] | ||
4 | strip_id_or_class: date | ||
5 | strip_id_or_class: tags | ||
6 | strip_id_or_class: tagLabel | ||
7 | strip: //div[@id="main"]/h1[1] | ||
8 | test_url: http://martinfowler.com/bliki/DatabaseThaw.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mashable.com.txt b/inc/3rdparty/site_config/standard/mashable.com.txt deleted file mode 100755 index b6efb6c5..00000000 --- a/inc/3rdparty/site_config/standard/mashable.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | author: substring-after(//span[@class='author_name'], 'By ') | ||
3 | date: //time | ||
4 | |||
5 | body: //article | ||
6 | strip: //div[@class='ytm-gallery-box'] | ||
7 | strip: //div[contains(@class, 'adsense')] | ||
8 | strip: //aside[contains(@class, 'social')] | ||
9 | strip_id_or_class: article-topics | ||
10 | |||
11 | test_url: http://mashable.com/2013/05/24/myspace-architects-rebuilding-a-brand/ | ||
diff --git a/inc/3rdparty/site_config/standard/matt.might.net.txt b/inc/3rdparty/site_config/standard/matt.might.net.txt deleted file mode 100755 index 30d585cf..00000000 --- a/inc/3rdparty/site_config/standard/matt.might.net.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: string("Matt Might") | ||
3 | strip: //h1/following-sibling::div | ||
4 | |||
5 | test_url: http://matt.might.net/articles/oo-cesk/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mattcutts.com.txt b/inc/3rdparty/site_config/standard/mattcutts.com.txt deleted file mode 100755 index 76b1eac6..00000000 --- a/inc/3rdparty/site_config/standard/mattcutts.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | date: //*[@class = 'published'] | ||
2 | test_url: http://www.mattcutts.com/blog/internet-censorship-sopa/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mbl.is.txt b/inc/3rdparty/site_config/standard/mbl.is.txt deleted file mode 100755 index fd26f091..00000000 --- a/inc/3rdparty/site_config/standard/mbl.is.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[class="frett-main"] | ||
2 | test_url: http://mbl.is/frettir/innlent/2012/02/21/litill_munur_a_fargjaldaverdi/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/medialens.org.txt b/inc/3rdparty/site_config/standard/medialens.org.txt deleted file mode 100755 index c26bac55..00000000 --- a/inc/3rdparty/site_config/standard/medialens.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | strip_id_or_class: article-tools | ||
2 | strip_id_or_class: pagenav | ||
3 | prune: no | ||
4 | test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html | ||
5 | test_contains: In an era of permanent war, economic meltdown | ||
diff --git a/inc/3rdparty/site_config/standard/medium.com.txt b/inc/3rdparty/site_config/standard/medium.com.txt deleted file mode 100755 index 9e9c6895..00000000 --- a/inc/3rdparty/site_config/standard/medium.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[contains(@class, 'postContent-inner')] | ||
2 | strip_id_or_class: supplementalPostContent | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: https://medium.com/@savolai/kaytettavyyden-haasteet-keskustelukulttuurista-2-3-6844c0d7893b | ||
7 | test_contains: Jos käytettävyysongelmat ovat kerran niin tyypillisiä | ||
8 | test_contains: Keskustelukulttuuriongelmasta (subjective vs. objective bugs) | ||
9 | |||
10 | test_url: https://medium.com/health-the-future/thirty-things-ive-learned-482765ee3503 | ||
11 | test_contains: Remember you will die | ||
12 | test_contains: You have to have some faith. | ||
diff --git a/inc/3rdparty/site_config/standard/megamp3.eu.txt b/inc/3rdparty/site_config/standard/megamp3.eu.txt deleted file mode 100755 index 1b6a1279..00000000 --- a/inc/3rdparty/site_config/standard/megamp3.eu.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h3[@class='episode_title'] | ||
2 | body: //ul[contains(@class, 'episode_imgdesc')]/li/descendant::* | ||
3 | prune: no | ||
4 | strip://*[contains(@class, 'plugin')] | ||
5 | strip://*[contains(@class, 'episode_keywords')] | ||
6 | |||
7 | test_url: http://www.megamp3.eu/?p=episode&name=2013-04-19_la_filiere_progressive_431.mp3 | ||
8 | test_url: http://www.megamp3.eu/feed.xml | ||
diff --git a/inc/3rdparty/site_config/standard/menshealth.com.sg.txt b/inc/3rdparty/site_config/standard/menshealth.com.sg.txt deleted file mode 100755 index 6a669253..00000000 --- a/inc/3rdparty/site_config/standard/menshealth.com.sg.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | strip: //div[contains(@style, 'float:right') and contains(., 'advertisement')] | ||
2 | body: //div[@style="float:left;width:740px;"] | ||
3 | |||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.menshealth.com.sg/fitness/mh-picks-under-armour-clutchfit-nitro-mid-cleats | ||
7 | test_contains: These cleats are made for one thing | ||
8 | |||
9 | test_url: http://www.menshealth.com.sg/fitness/top-10-fat-burning-bodyweight-moves-you-can-do-10-minutes | ||
10 | test_contains: let this workout fool you | ||
11 | |||
12 | test_url: http://www.menshealth.com.sg/fitness/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/menshealth.com.txt b/inc/3rdparty/site_config/standard/menshealth.com.txt deleted file mode 100755 index a1a46f63..00000000 --- a/inc/3rdparty/site_config/standard/menshealth.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | # need to find a way to eliminate <span> content for "related content" without eliminating important content | ||
2 | |||
3 | convert_double_br_tags: [yes] | ||
4 | #body: //div[@id='leftside'] | ||
5 | title: //h1 | ||
6 | title: //h2 | ||
7 | Author: substring-after(//h4, 'By ') | ||
8 | Author: substring-after(//h4, 'By: ') | ||
9 | #Strip: //span | ||
10 | strip_id_or_class: morefromcat | ||
11 | strip_id_or_class: mostpopular | ||
12 | strip_id_or_class: articlepagination | ||
13 | strip_id_or_class: toolbar | ||
14 | body: //div[@id='zmodcontent'] | ||
15 | single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')] | ||
16 | test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/metafilter.com.txt b/inc/3rdparty/site_config/standard/metafilter.com.txt deleted file mode 100755 index a2f3ada9..00000000 --- a/inc/3rdparty/site_config/standard/metafilter.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[contains(@class, 'copy') or contains(@class, 'comments')] | ||
2 | strip_id_or_class: related | ||
3 | strip: //a[. = 'Subscribe'] | ||
4 | strip: //h1/span[@class = 'smallcopy'] | ||
5 | strip: //a[@class = 'skip'] | ||
6 | strip: //div[@id = 'logo'] | ||
7 | strip: //div[contains(@class, 'comments') and contains(., 'You are not currently logged in')] | ||
8 | test_url: http://www.metafilter.com/128101/Probably-more-secure-than-the-Drafts-folder-on-a-shared-Gmail-account \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt b/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt deleted file mode 100755 index c295d734..00000000 --- a/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: (//td[starts-with(@id, 'postmessage_')])[1] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://mforum.cari.com.my/forum.php?mod=viewthread&tid=788033 | ||
6 | test_url: http://mforum.cari.com.my/forum.php?mod=rss&fid=265&auth=0 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mikeash.com.txt b/inc/3rdparty/site_config/standard/mikeash.com.txt deleted file mode 100755 index abaa6a81..00000000 --- a/inc/3rdparty/site_config/standard/mikeash.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class="blogtitle"] | ||
2 | strip: //div[@class="blogtitle"] | ||
3 | |||
4 | author: substring-after(//span[@class="blogheader"], 'Author: ') | ||
5 | test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mikeindustries.com.txt b/inc/3rdparty/site_config/standard/mikeindustries.com.txt deleted file mode 100755 index fb4636cc..00000000 --- a/inc/3rdparty/site_config/standard/mikeindustries.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@class='post_content']/h2 | ||
2 | date: //div[@class='dateline'] | ||
3 | body: //div[@class='entry'] | ||
4 | |||
5 | strip: //div[@class='closer'] | ||
6 | strip: //div[@class='navigation'] | ||
7 | strip: //div[@class='aux_pane'] | ||
8 | strip: //div[@class='aux_aux_pane'] | ||
9 | test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt deleted file mode 100755 index 773a627c..00000000 --- a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //*[@class="article"]/h1 | ||
2 | date: //*[@class="article"]/div[@class="date"] | ||
3 | |||
4 | # strip the title and date from the article text | ||
5 | strip: //*[@class="article"]/h1 | ||
6 | strip: //*[@class="article"]/div[@class="date"] | ||
7 | |||
8 | # strip annoying <br> between metadata and article | ||
9 | strip: //*[@class="article"]/div[@class="date"]/following-sibling::br | ||
10 | test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/minnpost.com.txt b/inc/3rdparty/site_config/standard/minnpost.com.txt deleted file mode 100755 index dc926a6f..00000000 --- a/inc/3rdparty/site_config/standard/minnpost.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //*[@id="content-header"]/h1 | ||
2 | author: //*[contains(@class, 'byline')]/a/text() | ||
3 | date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|') | ||
4 | body: //*[contains(@class, 'node-body')] | ||
5 | test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt deleted file mode 100755 index 2033cf33..00000000 --- a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | # Remove extra links | ||
2 | strip: //*[@class='appended_html'] | ||
3 | test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mises.org.txt b/inc/3rdparty/site_config/standard/mises.org.txt deleted file mode 100755 index 73c485e6..00000000 --- a/inc/3rdparty/site_config/standard/mises.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | strip_id_or_class: 'book-ad' | ||
2 | strip_id_or_class: 'bigger pullquote' | ||
3 | strip_id_or_class: 'subscribe' | ||
4 | strip_id_or_class: 'blog-link' | ||
5 | test_url: http://mises.org/daily/4804 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt deleted file mode 100755 index 765fab3f..00000000 --- a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //h1[@class='article-headline'] | ||
2 | date: //span[@class='timeStamp'] | ||
3 | author: substring-before(//p[@class='article-byline'], '/') | ||
4 | body: //div[@id='article'] | ||
5 | #strip: //div[@class='inner'] | ||
6 | strip: //div[@id='article_head'] | ||
7 | strip: //p[@class='tagLine'] | ||
8 | strip: //div[@id='article_related_links'] | ||
9 | strip: //div[@id='article_related_mlb'] | ||
10 | strip: //span[@class='more'] | ||
11 | strip: //div[@class='article_component'] | ||
12 | strip: //span[@class='screen_reader'] | ||
13 | strip: //ul[@class='columnists_blurb'] | ||
14 | test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt deleted file mode 100755 index 8480e302..00000000 --- a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //h1[@id = 'stream_title'] | ||
2 | author: //p[@class = 'byline']/a | ||
3 | date: //span[@class = 'datetime'] | ||
4 | |||
5 | body: //div[@id = 'stream_container'] | ||
6 | strip: //p[@class = 'byline'] | ||
7 | strip_id_or_class: stream_summary | ||
8 | strip_id_or_class: social-spoken | ||
9 | strip_id_or_class: datetime | ||
10 | strip_id_or_class: author-mini-profile | ||
11 | strip_id_or_class: social-tools | ||
12 | strip_id_or_class: entry-tags | ||
13 | strip_id_or_class: fb-like-box | ||
14 | test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mlssoccer.com.txt b/inc/3rdparty/site_config/standard/mlssoccer.com.txt deleted file mode 100755 index 5d706f88..00000000 --- a/inc/3rdparty/site_config/standard/mlssoccer.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //*[@class="header_title"]/h1 | ||
2 | date: //*[@class="field-date"] | ||
3 | author: //*[@class="field-author"] | ||
4 | body: //div[contains(@class, 'content')] | ||
5 | |||
6 | test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mmo-champion.com.txt b/inc/3rdparty/site_config/standard/mmo-champion.com.txt deleted file mode 100755 index 50d8a24f..00000000 --- a/inc/3rdparty/site_config/standard/mmo-champion.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id = 'article_content']/div[contains(@class,'article')] | ||
3 | author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')] | ||
4 | date: //div[@class = 'article_username_container'] | ||
5 | test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mnn.com.txt b/inc/3rdparty/site_config/standard/mnn.com.txt deleted file mode 100755 index d3576df2..00000000 --- a/inc/3rdparty/site_config/standard/mnn.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | tidy: no | ||
2 | author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text() | ||
3 | date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2] | ||
4 | body: //div[@class="node"] | ||
5 | |||
6 | strip_id_or_class: vertical-social-bar | ||
7 | strip_id_or_class: blogs_paginator | ||
8 | strip_id_or_class: horizontal-social-links | ||
9 | strip_id_or_class: servicelinksdiv | ||
10 | |||
11 | test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mno.hu.txt b/inc/3rdparty/site_config/standard/mno.hu.txt deleted file mode 100755 index 8a3f9391..00000000 --- a/inc/3rdparty/site_config/standard/mno.hu.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //title | ||
2 | |||
3 | author: //div[@class="author"] | ||
4 | |||
5 | strip_id_or_class: 'header' | ||
6 | strip_id_or_class: 'cikk_ajanlo' | ||
7 | strip_id_or_class: 'buttons' | ||
8 | strip_id_or_class: 'related' | ||
9 | strip_id_or_class: 'adbox ad_cikk_kozepre' | ||
10 | strip_id_or_class: 'cikk-cimkek' | ||
11 | strip_id_or_class: 'cikk_ertekeles' | ||
12 | |||
13 | strip_comments: yes | ||
14 | test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt deleted file mode 100755 index c60252ef..00000000 --- a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'headline')] | ||
2 | body: //article[contains(@class, 'full-art')] | ||
3 | strip_id_or_class: image-credit | ||
4 | test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mobile.slate.com.txt b/inc/3rdparty/site_config/standard/mobile.slate.com.txt deleted file mode 100755 index 6ffcd18f..00000000 --- a/inc/3rdparty/site_config/standard/mobile.slate.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h2[@class="article_title"] | ||
2 | strip: //a[@class="houseAdLink"] | ||
3 | strip: //h1 | ||
4 | strip: //div[@class="more_articles"] | ||
5 | test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt deleted file mode 100755 index 82da4aec..00000000 --- a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@class='post uncustomized-post-template'] | ||
2 | |||
3 | # remove duplicate of post title, which is a link | ||
4 | strip: //h3[@class='post-title'] | ||
5 | |||
6 | # remove permalink and timestamp, which isn't useful as it's a time with no date | ||
7 | strip: //span[@class='post-timestamp'] | ||
8 | |||
9 | # remove labels (tags) | ||
10 | strip: //span[@class='post-labels'] | ||
11 | test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/modernghana.com.txt b/inc/3rdparty/site_config/standard/modernghana.com.txt deleted file mode 100755 index 306ef8d9..00000000 --- a/inc/3rdparty/site_config/standard/modernghana.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | author: //meta[@name="author"]/@content | ||
3 | date: //span[@class='date1'] | ||
4 | body: //div[@id='newsimage'] | //div[@id='bodytext'] | ||
5 | tidy: no | ||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/money.cnn.com.txt b/inc/3rdparty/site_config/standard/money.cnn.com.txt deleted file mode 100755 index d5e03d20..00000000 --- a/inc/3rdparty/site_config/standard/money.cnn.com.txt +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | title: //h1[@class='storyheadline'] | ||
3 | author: //meta[@name="AUTHOR"]/@content | ||
4 | date: //span[@class='cnnDateStamp'] | ||
5 | date: //meta[@name="DATE"]/@content | ||
6 | body: //div[@id='storytext' or @class='storytext'] | ||
7 | |||
8 | strip_id_or_class: ie_column | ||
9 | strip_id_or_class: sharewidgets | ||
10 | strip_image_src: bug.gif | ||
11 | |||
12 | strip: //div[@class="hed_side"] | ||
13 | strip: //span[@class="byline"] | ||
14 | strip: //a[@class="soc-twtname"] | ||
15 | strip: //span[@class="cnnDateStamp"] | ||
16 | strip: //div[@class="storytimestamp"] | ||
17 | strip: //div[@class="cnnCol_side"] | ||
18 | |||
19 | prune: no | ||
20 | tidy: no | ||
21 | |||
22 | test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 | ||
23 | test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm | ||
24 | test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/monkeyzen.com.txt b/inc/3rdparty/site_config/standard/monkeyzen.com.txt deleted file mode 100755 index f779c38e..00000000 --- a/inc/3rdparty/site_config/standard/monkeyzen.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://monkeyzen.com/2011/09/siluetas-de-clasicos-a-modo-de-vinilos \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt b/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt deleted file mode 100644 index 24c949e9..00000000 --- a/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id='header']//h1[1] | ||
2 | |||
3 | body: //div[@id='content'] | ||
4 | |||
5 | strip_id_or_class: toc | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://moo.nac.uci.edu/~hjm/HOWTO_move_data.html | ||
diff --git a/inc/3rdparty/site_config/standard/moonsault.de.txt b/inc/3rdparty/site_config/standard/moonsault.de.txt deleted file mode 100755 index 55026eeb..00000000 --- a/inc/3rdparty/site_config/standard/moonsault.de.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | strip_image_src: menu | ||
2 | strip_image_src: templates | ||
3 | strip: //div/a | ||
4 | strip: //div/b | ||
5 | strip: //div/strong | ||
6 | strip: //td[@width='30%'] | ||
7 | strip: //br[1] | ||
8 | strip: //br[2] | ||
9 | strip: //br[3] | ||
10 | strip: //br[4] | ||
11 | strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home'] | ||
12 | strip_id_or_class: cse-branding-right | ||
13 | test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt deleted file mode 100755 index 780cca4f..00000000 --- a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[@class='print-title'] | ||
2 | body: //div[@class='print-submitted' or @class='print-created' or @class='print-content'] | ||
3 | prune: no | ||
4 | |||
5 | single_page_link: //li[@class='print']/a | ||
6 | |||
7 | test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt deleted file mode 100755 index c6312c0e..00000000 --- a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: //span[@class="author"]/a | ||
2 | date: //span[@class="date"] | ||
3 | body: //div[@class="story-content"] | ||
4 | strip: //aside | ||
5 | test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mothering.com.txt b/inc/3rdparty/site_config/standard/mothering.com.txt deleted file mode 100755 index a34adff7..00000000 --- a/inc/3rdparty/site_config/standard/mothering.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h2[contains(@class,'post_headline')] | ||
2 | body: //div[@class='entry'] | ||
3 | convert_double_br_tags: yes | ||
4 | strip_image_src: _selected.gif | ||
5 | strip_id_or_class: addthis_ | ||
6 | strip: //a[contains(@href,'feedburner.com')] | ||
7 | test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/motherjones.com.txt b/inc/3rdparty/site_config/standard/motherjones.com.txt deleted file mode 100755 index 851feb7e..00000000 --- a/inc/3rdparty/site_config/standard/motherjones.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id = 'content-area'] | ||
3 | next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')] | ||
4 | tidy: no | ||
5 | author: //p[contains(@class, 'byline')]/a | ||
6 | |||
7 | strip_id_or_class: node-header | ||
8 | strip_id_or_class: hdr-tools | ||
9 | strip_id_or_class: node-body-break | ||
10 | strip_id_or_class: pullquote | ||
11 | strip_id_or_class: node-pager | ||
12 | strip_id_or_class: author-bio | ||
13 | strip_id_or_class: node-footer | ||
14 | |||
15 | test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/motorfull.com.txt b/inc/3rdparty/site_config/standard/motorfull.com.txt deleted file mode 100755 index c6bec7e9..00000000 --- a/inc/3rdparty/site_config/standard/motorfull.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://motorfull.com/2011/09/aparca-valeo-park4u-remote \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/movie.douban.com.txt b/inc/3rdparty/site_config/standard/movie.douban.com.txt deleted file mode 100755 index eae211ed..00000000 --- a/inc/3rdparty/site_config/standard/movie.douban.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://movie.douban.com/review/1062013/ | ||
3 | |||
4 | title: //span[contains(@property, 'v:summary')] | ||
5 | author: //span[contains(@property, 'v:reviewer')] | ||
6 | date://span[contains(@property, 'v:dtreviewed')] | ||
7 | body://div[contains(@class, 'main-bd')] | ||
8 | |||
9 | strip://img[contains(@class,'rating')]|//img[contains(@class,'review-stat')] | ||
10 | convert_double_br_tags: yes | ||
11 | test_url: http://movie.douban.com/review/1062013/ | ||
12 | test_url: http://movie.douban.com/review/1021870/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt deleted file mode 100755 index 7a284275..00000000 --- a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[class="mainBody"] | ||
2 | footnotes: no | ||
3 | test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt deleted file mode 100755 index f008d2d1..00000000 --- a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | title: //title | ||
2 | author: //div[@id='byline'] | ||
3 | |||
4 | date: //div[contains(@class,'timestamp')]/abbr/text() | ||
5 | |||
6 | body: //div[@id='intellitTXT'] | ||
7 | |||
8 | strip: //div[@id='byline'] | ||
9 | strip: //div[contains(@class,'timestamp')] | ||
10 | strip: //div[contains(@class, 'ad-label')] | ||
11 | strip: //div[contains(@class, 'ad-break')] | ||
12 | strip: //span[contains(@class, 'x-video')] | ||
13 | strip: //span[contains(@class, 'inline')] | ||
14 | strip: //div[contains(@class, 'video')] | ||
15 | strip: //div[contains(@class, 'discuss')] | ||
16 | strip: //div[@id='most-popular'] | ||
17 | strip: //div[contains(@class,'drawer')] | ||
18 | strip: //*[contains(@class, 'hide')] | ||
19 | |||
20 | footnotes: no | ||
21 | test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt b/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt deleted file mode 100755 index 8a7590ab..00000000 --- a/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@id='WNStoryBody'] | ||
2 | author: //div[@id='WNStoryByline'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.myfoxatlanta.com/category/233685/local-news?clienttype=rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/myfoxboston.com.txt b/inc/3rdparty/site_config/standard/myfoxboston.com.txt deleted file mode 100755 index 9ad8ce05..00000000 --- a/inc/3rdparty/site_config/standard/myfoxboston.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"] | ||
2 | tidy: no | ||
3 | |||
4 | test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/myrecipes.com.txt b/inc/3rdparty/site_config/standard/myrecipes.com.txt deleted file mode 100755 index 956be1e6..00000000 --- a/inc/3rdparty/site_config/standard/myrecipes.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h2[contains(@class, 'name')] | ||
2 | body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')] | ||
3 | |||
4 | strip_id_or_class: photoBy | ||
5 | strip_id_or_class: link | ||
6 | |||
7 | single_page_link: //li[@class='print']/a[contains(@href, '/print/')] | ||
8 | |||
9 | prune: no | ||
10 | tidy: no | ||
11 | |||
12 | test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/narenji.ir.txt b/inc/3rdparty/site_config/standard/narenji.ir.txt deleted file mode 100755 index 6c3d0c24..00000000 --- a/inc/3rdparty/site_config/standard/narenji.ir.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='node'] | ||
2 | test_url: http://www.narenji.ir/2806 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nasa.gov.txt b/inc/3rdparty/site_config/standard/nasa.gov.txt deleted file mode 100755 index 7df1112b..00000000 --- a/inc/3rdparty/site_config/standard/nasa.gov.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div[@class='address']/span | ||
2 | author: substring-before(//span[@class='credits'],',') | ||
3 | date: //div[@class='promodatepress']/span | ||
4 | body: //div[@class='default_style_wrap'] | ||
5 | strip: //div[@class='text_adjust'] | ||
6 | strip: //div[@class='skiplink'] | ||
7 | strip: //h2 | ||
8 | test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nbweekly.com.txt b/inc/3rdparty/site_config/standard/nbweekly.com.txt deleted file mode 100755 index 2645d406..00000000 --- a/inc/3rdparty/site_config/standard/nbweekly.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | date://span[contains(@class,'date')] | ||
2 | |||
3 | body://div[contains(@class,'contWarp')] | ||
4 | |||
5 | strip://div[contains(@class,'keyWord')] | ||
6 | strip://div[contains(@class,'submitComt')] | ||
7 | strip://div[contains(@class,'cmts')] | ||
8 | strip://div[contains(@class,'notice')] | ||
9 | strip://div[contains(@class,'part pt-second')] | ||
10 | test_url: http://www.nbweekly.com/news/china/201203/29316.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt deleted file mode 100755 index e7cc4313..00000000 --- a/inc/3rdparty/site_config/standard/neh.gov.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | ||
2 | |||
3 | |||
4 | #meta data | ||
5 | title:substring-after(substring-after(//title,':'),':') | ||
6 | author:substring-after(//h2[@class = 'subHead'],'By') | ||
7 | date:substring-before(substring-after(//title,':'),':') | ||
8 | |||
9 | #img and caption handling | ||
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | ||
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | ||
12 | |||
13 | # clean up | ||
14 | strip: //table[@class = 'marginpaddingTop'] | ||
15 | strip: //h2[@class = 'subHead'] | ||
16 | |||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/neomoney.co.txt b/inc/3rdparty/site_config/standard/neomoney.co.txt deleted file mode 100755 index 2089fc39..00000000 --- a/inc/3rdparty/site_config/standard/neomoney.co.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //*[@class="header_title"]/h1 | ||
2 | body: //div[contains(@class, 'content')] | ||
3 | test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/net-security.org.txt b/inc/3rdparty/site_config/standard/net-security.org.txt deleted file mode 100755 index b7fedbf3..00000000 --- a/inc/3rdparty/site_config/standard/net-security.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@class='content-title'] | ||
2 | #date: substring-after(//div[@class='dernek-text-under'],'Posted on') | ||
3 | body: //div[@class='content-item'] | ||
4 | next_page_link: //li[@class='next']/a | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | test_url: http://www.net-security.org/article.php?id=1732 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/netmagazine.com.txt b/inc/3rdparty/site_config/standard/netmagazine.com.txt deleted file mode 100755 index dcea047c..00000000 --- a/inc/3rdparty/site_config/standard/netmagazine.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //div[@class="submitted"]/span | ||
3 | |||
4 | # seems like this should work, but nothing is returned. Issue with xpath parser? | ||
5 | date: //div[@class="submitted"]/time | ||
6 | |||
7 | body: //div[@id="main-content"] | ||
8 | |||
9 | strip_comments: no | ||
10 | |||
11 | strip: //h1 | ||
12 | strip: //div[@class="submitted"] | ||
13 | strip: //dd[@class="profile-avatar"] | ||
14 | strip: //div[@class="author-profile"]/dl/dt[1] | ||
15 | strip: //div[@id="right-col"] | ||
16 | test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/netzpolitik.org.txt b/inc/3rdparty/site_config/standard/netzpolitik.org.txt deleted file mode 100755 index 7fa43fd7..00000000 --- a/inc/3rdparty/site_config/standard/netzpolitik.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | author: //a[@ref='author'] | ||
3 | date: //span[@class='entry-date'] | ||
4 | body: //div[@class='entry-content'] | ||
5 | |||
6 | test_url: http://netzpolitik.org/2011/buch-generation-facebook/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newleftproject.org.txt b/inc/3rdparty/site_config/standard/newleftproject.org.txt deleted file mode 100755 index d9af99d8..00000000 --- a/inc/3rdparty/site_config/standard/newleftproject.org.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //div[contains(@class, 'article_header')]//h3 | ||
2 | |||
3 | test_url: http://www.newleftproject.org/index.php/site/article_comments/do_we_need_a_facebook_of_the_left \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newmatilda.com.txt b/inc/3rdparty/site_config/standard/newmatilda.com.txt deleted file mode 100755 index f17ecdc6..00000000 --- a/inc/3rdparty/site_config/standard/newmatilda.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id="maincontent"]/h1 | ||
2 | body: //div[@id="maincontent"] | ||
3 | date: //div[@id="maincontent"]/p[2] | ||
4 | author: //ul[@id="contributors"]/li/p/b | ||
5 | |||
6 | strip: //p[@*] | ||
7 | strip: //h1 | ||
8 | strip: //div[@id="maincontent"]/div | ||
9 | test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newrepublic.com.txt b/inc/3rdparty/site_config/standard/newrepublic.com.txt deleted file mode 100755 index 039f0385..00000000 --- a/inc/3rdparty/site_config/standard/newrepublic.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | author: //span[@class="authors"] | ||
2 | date: //span[@class="date"] | ||
3 | body: //div[@class="primary"] | ||
4 | |||
5 | strip: //div[@id="controls"] | ||
6 | strip: //div[@id="read-next"] | ||
7 | |||
8 | test_url: http://www.newrepublic.com/article/112731/moocs-will-online-education-ruin-university-experience \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news-gazette.com.txt b/inc/3rdparty/site_config/standard/news-gazette.com.txt deleted file mode 100755 index 2b352707..00000000 --- a/inc/3rdparty/site_config/standard/news-gazette.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div[@id="main-content"]//h2 | ||
2 | |||
3 | author: //div[@id="main-content"]//span[@class="authors"] | ||
4 | |||
5 | date: //div[@id="main-content"]//span[@class="timestamp"] | ||
6 | |||
7 | body: //div[@id="main-content"]//div[@class="content"] | ||
8 | test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.cnet.com.txt b/inc/3rdparty/site_config/standard/news.cnet.com.txt deleted file mode 100755 index 78af70f4..00000000 --- a/inc/3rdparty/site_config/standard/news.cnet.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | #This should apply to *.cnet.com. Not just news.cnet.com. | ||
2 | title: //h1 | ||
3 | author: //img[@class="mugshot"]/@alt | ||
4 | strip: //h1 | ||
5 | strip_id_or_class: breadcrumb | ||
6 | strip: //p[@id="introP"] | ||
7 | strip: //div[@class="postByline"] | ||
8 | strip: //div[@class="editorBio"] | ||
9 | strip: //div[@class="inline-slideshow"] | ||
10 | strip: //div[@class="related"] | ||
11 | body: //div[@class="postBody txtWrap"] | ||
12 | test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.detik.com.txt b/inc/3rdparty/site_config/standard/news.detik.com.txt deleted file mode 100755 index 629bc917..00000000 --- a/inc/3rdparty/site_config/standard/news.detik.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title://div[@class="content_detail"]/h1 | ||
2 | |||
3 | author://div[@class="author"]/strong | ||
4 | |||
5 | date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB') | ||
6 | |||
7 | body://div[@class="text_detail"] | ||
8 | test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt deleted file mode 100755 index 5754d47a..00000000 --- a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@id='main'] | ||
2 | strip: //div[@id='sbs'] | ||
3 | strip: //div[@id='fsizeSwitch'] | ||
4 | strip: //div[@id='googleAd'] | ||
5 | strip: //div[@id='detailFoot'] | ||
6 | strip_image_src: counter?key | ||
7 | convert_double_br_tags: yes | ||
8 | |||
9 | test_url: http://news.kanaloco.jp/localnews/article/1105200018/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt deleted file mode 100755 index 1df47314..00000000 --- a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h2[@class="lyt-hdg-02-04"] | ||
2 | |||
3 | author: //div[@class="lyt-namearea"]/a | ||
4 | |||
5 | date: //div[@class="lyt-namearea"]/text() | ||
6 | |||
7 | body: //div[@class="articleContent"] | ||
8 | |||
9 | strip: //div[@id="tab-aside"] | ||
10 | |||
11 | test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.orf.at.txt b/inc/3rdparty/site_config/standard/news.orf.at.txt deleted file mode 100755 index 3b1d3ccb..00000000 --- a/inc/3rdparty/site_config/standard/news.orf.at.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | single_page_link: //div[@id='content']//p[@class='readMore']/a | ||
2 | |||
3 | title: //div[@class='hidden offscreen']/h2 | ||
4 | body: //div[@id="storyText"] | ||
5 | move_into(//div[@id='storyText']): //div[@class='fact'] | ||
6 | strip: //small[@class='credit'] | ||
7 | strip: //small[@class='caption'] | ||
8 | date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') | ||
9 | strip: //p[@class='toplink'] | ||
10 | |||
11 | test_url: http://news.orf.at/stories/2084731/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.rambler.ru.txt b/inc/3rdparty/site_config/standard/news.rambler.ru.txt deleted file mode 100755 index 1d547334..00000000 --- a/inc/3rdparty/site_config/standard/news.rambler.ru.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //article | ||
2 | title: //h1 | ||
3 | author: //span[@class='b-article-source-dropdown'] | ||
4 | strip: //span[@class='b-article-photo-incut__source'] | ||
5 | strip: //a[@class='b-read-more b-read-more_bottom'] | ||
6 | |||
7 | |||
8 | tidy:no | ||
9 | test_url: http://news.rambler.ru/12972208/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.techmeme.com.txt b/inc/3rdparty/site_config/standard/news.techmeme.com.txt deleted file mode 100755 index ba4db828..00000000 --- a/inc/3rdparty/site_config/standard/news.techmeme.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class='main']/div[@class='item'] | ||
2 | strip: //div[@class='right'] | ||
3 | |||
4 | test_url: http://news.techmeme.com/110516/fh-rip \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.yahoo.com.txt b/inc/3rdparty/site_config/standard/news.yahoo.com.txt deleted file mode 100755 index fc1739c8..00000000 --- a/inc/3rdparty/site_config/standard/news.yahoo.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | title: //h1[@class='headline'] | ||
3 | author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn'] | ||
4 | date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title | ||
5 | body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')] | ||
6 | #strip: //cite/abbr | ||
7 | strip_id_or_class: action | ||
8 | strip_id_or_class: prefetch | ||
9 | tidy: no | ||
10 | prune: no | ||
11 | |||
12 | test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt deleted file mode 100755 index f7441d17..00000000 --- a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | strip_comments: no | ||
2 | strip: //a[. = 'reply'] | ||
3 | test_url: http://news.ycombinator.com/item?id=1516461 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.zing.vn.txt b/inc/3rdparty/site_config/standard/news.zing.vn.txt deleted file mode 100755 index af81e90e..00000000 --- a/inc/3rdparty/site_config/standard/news.zing.vn.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body://div[@class="newsdetail_wrapper"] | ||
2 | strip://div[@class="more_news"] | ||
3 | test_url: http://news.zing.vn/xa-hoi/s-phat-nang-xe-may-di-duong-tren-cao-ha-noi/a280838.html#home_noibat1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news247.gr.txt b/inc/3rdparty/site_config/standard/news247.gr.txt deleted file mode 100755 index 87637bed..00000000 --- a/inc/3rdparty/site_config/standard/news247.gr.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | |||
3 | body: //img[@id='relPicsMainPic'] | //div[contains(@class, 'storyContent')] | ||
4 | |||
5 | test_url: http://news247.gr/eidiseis/katatheseis_fwtia_htan_apofasismenoi_akomh_kai_na_afairesoyn_zwes_an_thewrousan_oti_to_thuma_htan_antipalos_toys.2433351.html | ||
6 | test_url: http://news247.gr/?widget=rssfeed&view=feed&contentId=38291 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsbomb.gr.txt b/inc/3rdparty/site_config/standard/newsbomb.gr.txt deleted file mode 100755 index 5eb0ea46..00000000 --- a/inc/3rdparty/site_config/standard/newsbomb.gr.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | date: //meta[@name='og:article:published_time']/@value | ||
2 | |||
3 | body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] | ||
4 | |||
5 | strip_id_or_class: itemImageGallery | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsle.com.txt b/inc/3rdparty/site_config/standard/newsle.com.txt deleted file mode 100755 index e500ddcc..00000000 --- a/inc/3rdparty/site_config/standard/newsle.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | single_page_link: //iframe/@src | ||
2 | test_url: http://newsle.com/article/0/15831103/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsmill.se.txt b/inc/3rdparty/site_config/standard/newsmill.se.txt deleted file mode 100755 index 1a990319..00000000 --- a/inc/3rdparty/site_config/standard/newsmill.se.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent'] | ||
3 | author: //div[@class='byline']//a[contains(@href, '/user/')] | ||
4 | |||
5 | strip_id_or_class: facts | ||
6 | strip_id_or_class: articleBlogsHolder | ||
7 | strip_id_or_class: byline | ||
8 | |||
9 | prune: no | ||
10 | tidy: no | ||
11 | |||
12 | test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsunspun.org.txt b/inc/3rdparty/site_config/standard/newsunspun.org.txt deleted file mode 100755 index 247bbebb..00000000 --- a/inc/3rdparty/site_config/standard/newsunspun.org.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@class='right']//div[@class='articles'] | ||
2 | author: //div[@id='artinfo']//a[contains(@href, '/author/')] | ||
3 | strip: //div[@id='artinfo'] | ||
4 | strip: //table[//a[contains(@href, 'twitter.com')]] | ||
5 | strip_id_or_class: twitter | ||
6 | |||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsweek.com.txt b/inc/3rdparty/site_config/standard/newsweek.com.txt deleted file mode 100755 index 565648ba..00000000 --- a/inc/3rdparty/site_config/standard/newsweek.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@class = 'article-body'] | ||
2 | title: //h1[@class = 'article-title'] | ||
3 | strip: //aside | ||
4 | |||
5 | test_url: http://www.newsweek.com/day-steve-mcqueen-met-his-new-nazi-neighbor-keith-moon-229741 | ||
6 | test_url: http://www.newsweek.com/2014/06/13/how-greylock-partners-finds-next-facebook-253329.html | ||
diff --git a/inc/3rdparty/site_config/standard/newswise.com.txt b/inc/3rdparty/site_config/standard/newswise.com.txt deleted file mode 100755 index 10120ea1..00000000 --- a/inc/3rdparty/site_config/standard/newswise.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | prune: no | ||
2 | tidy: no | ||
3 | |||
4 | title: //h1/a[2] | ||
5 | body: //div[@id="main"] | ||
6 | author: //span[@id="articlesource"] | ||
7 | date: //span[contains(@class, 'releasedate')] | ||
8 | |||
9 | strip: //div[@class="inst-logo"] | ||
10 | strip: //h1[1] | ||
11 | |||
12 | strip_id_or_class: addthis | ||
13 | strip_id_or_class: released | ||
14 | strip_id_or_class: skiptranslate | ||
15 | strip_id_or_class: flash | ||
16 | |||
17 | test_url: http://www.newswise.com/articles/first-heat-wave-of-season-puts-elderly-at-risk | ||
diff --git a/inc/3rdparty/site_config/standard/newyorker.com.txt b/inc/3rdparty/site_config/standard/newyorker.com.txt deleted file mode 100755 index 950324a3..00000000 --- a/inc/3rdparty/site_config/standard/newyorker.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1[@id='articlehed'] | //h2[@id="articleintro"] | ||
2 | body: //div[@id='articletext'] | ||
3 | |||
4 | strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] | //div[@class="cartoon"] | ||
5 | |||
6 | date: //h4[@id='articleauthor']/span[@class='dd dds'] | ||
7 | date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published'] | ||
8 | |||
9 | single_page_link: //div[@class='paginationViewSinglePage']/a | ||
10 | test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html | ||
11 | test_url: http://www.newyorker.com/reporting/2013/04/22/130422fa_fact_bilger?currentPage=all&mobify=0 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/next-gen.biz.txt b/inc/3rdparty/site_config/standard/next-gen.biz.txt deleted file mode 100755 index b8d235db..00000000 --- a/inc/3rdparty/site_config/standard/next-gen.biz.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | # 2011-08-22 [carlo@...] initial version | ||
2 | # 2011-08-22 [carlo@...] removed comments & social links | ||
3 | |||
4 | tidy: no | ||
5 | |||
6 | single_page_link: //a[@class="single active"] | ||
7 | |||
8 | body: //div[@id="main"]//div[@class="content-region"]/article | ||
9 | author: //span[@class="author-name"] | ||
10 | date: //time/text() | ||
11 | |||
12 | strip_id_or_class: //aside[@id="related"] | ||
13 | strip: //footer | ||
14 | |||
15 | title: //h1 | ||
16 | test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nfl.com.txt b/inc/3rdparty/site_config/standard/nfl.com.txt deleted file mode 100755 index 956b288f..00000000 --- a/inc/3rdparty/site_config/standard/nfl.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | # doesn't look like selecting an attribute value works? | ||
2 | # author: //meta[@id="authorName"]@value | ||
3 | |||
4 | author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ") | ||
5 | date: //abbr[@id="article-time"] | ||
6 | title: //div[@id="article-hdr"]/h1 | ||
7 | body: //div[@class="articleText"] | ||
8 | |||
9 | # strip miscellaneous teasers & etc | ||
10 | strip: //div[@class="removeformobile"] | ||
11 | test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt deleted file mode 100755 index 44a82a95..00000000 --- a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | next_page_link: //div[@class='nextpage_continue']/a | ||
2 | strip: //div[@class='nextpage_continue'] | ||
3 | strip_id_or_class: nextpage | ||
4 | title: //div[@class='article_title']//h1 | ||
5 | body: //div[@class='article_title']/.. | ||
6 | body: //div[@class='content'] | ||
7 | test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nhk.or.jp.txt b/inc/3rdparty/site_config/standard/nhk.or.jp.txt deleted file mode 100755 index 0a3bb913..00000000 --- a/inc/3rdparty/site_config/standard/nhk.or.jp.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@id = 'news_right'] | ||
2 | test_url: http://www.nhk.or.jp/news/html/20110309/t10014559982000.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt deleted file mode 100755 index f0e28afb..00000000 --- a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | body: //div[@id="main"] | ||
2 | title: //div[@id="main"]/h3 | ||
3 | |||
4 | # Remove ‘Review’ and ‘Wii’. | ||
5 | strip: //div[@class="badge"] | ||
6 | |||
7 | # Remove duplicate title and country flag. | ||
8 | strip: //h3 | ||
9 | |||
10 | # Commented out below are attempts to extract the author and date, which did not work. | ||
11 | # author: //p[@class="extra "]/a | ||
12 | # date: //p[@class="extra "]/span[@class="when"] | ||
13 | test_url: http://www.nintendoworldreport.com/review/28400 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nojesguiden.se.txt b/inc/3rdparty/site_config/standard/nojesguiden.se.txt deleted file mode 100755 index b15f0612..00000000 --- a/inc/3rdparty/site_config/standard/nojesguiden.se.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: //span[@class='meta']/span[@class='username'] | ||
2 | body: //div[@class='article-content'] | ||
3 | |||
4 | strip_id_or_class: 'article-actions' | ||
5 | test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt deleted file mode 100755 index f698d98e..00000000 --- a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='pn-maincontent'] | ||
3 | strip_id_or_class: z-menu | ||
4 | strip_id_or_class: news_category | ||
5 | strip_id_or_class: news_title | ||
6 | strip_id_or_class: news_modify | ||
7 | strip_id_or_class: news_morearticlesincat | ||
8 | strip_id_or_class: ezc_comments | ||
9 | strip_comments: yes | ||
10 | |||
11 | test_url: http://www.northumberlandview.ca/index.php?module=news&type=user&func=display&sid=31127 | ||
diff --git a/inc/3rdparty/site_config/standard/nosalty.hu.txt b/inc/3rdparty/site_config/standard/nosalty.hu.txt deleted file mode 100755 index 7e20cadf..00000000 --- a/inc/3rdparty/site_config/standard/nosalty.hu.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@id='tab-recept']//h1 | ||
2 | body: //div[@id='tab-recept']//div[contains(@class, 'column-container')] | ||
3 | strip_id_or_class: ajanlo-box | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.nosalty.hu/recept/szupergyors-fank \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nplusonemag.com.txt b/inc/3rdparty/site_config/standard/nplusonemag.com.txt deleted file mode 100755 index 1b817c04..00000000 --- a/inc/3rdparty/site_config/standard/nplusonemag.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: /html/body/div[3]/div/div/h1 | ||
2 | |||
3 | body: //*[@id="article-body"] | ||
4 | |||
5 | |||
6 | test_url: http://nplusonemag.com/the-outskirts-of-progress \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/npr.org.txt b/inc/3rdparty/site_config/standard/npr.org.txt deleted file mode 100755 index acd73e48..00000000 --- a/inc/3rdparty/site_config/standard/npr.org.txt +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | title: //div[contains(@class, 'storytitle')]//h1 | ||
2 | author: //p[@class="byline"]/span | ||
3 | body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')] | ||
4 | date: //meta[@name="date"]/@content | ||
5 | |||
6 | strip_id_or_class: enlarge_measure | ||
7 | strip_id_or_class: enlarge_html | ||
8 | strip: //a[contains(@class, 'enlargeicon')] | ||
9 | strip: //div[contains(@class, 'bookedition')] | ||
10 | strip: //div[@class='textsize'] | ||
11 | strip: //ul[@class='genres'] | ||
12 | strip: //span[@class='bull'] | ||
13 | strip_id_or_class: secondary | ||
14 | strip_id_or_class: con1col | ||
15 | strip: //h3[@class='conheader'] | ||
16 | |||
17 | replace_string(<a name="more"> </a>): <!-- no more --> | ||
18 | replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> | ||
19 | replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2> | ||
20 | |||
21 | prune: no | ||
22 | strip://div[@class="ecommercepop"] | ||
23 | strip://span[@class="bull"] | ||
24 | strip://span[@class="purchaseLink"] | ||
25 | strip://div[@class="enlarge_html"] | ||
26 | strip://div[@class="enlarge_measure"] | ||
27 | strip://div[@class="container con1col small"] | ||
28 | strip://a[contains(@class, "enlargebtn")] | ||
29 | strip://div[contains(@class, "bucketwrap internallink")] | ||
30 | |||
31 | test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates | ||
32 | test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right | ||
33 | test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres | ||
34 | test_url: http://www.npr.org/templates/story/story.php?storyId=229103221 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nybooks.com.txt b/inc/3rdparty/site_config/standard/nybooks.com.txt deleted file mode 100755 index d95ec68e..00000000 --- a/inc/3rdparty/site_config/standard/nybooks.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | strip_id_or_class: sIFR-alternate | ||
2 | title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2 | ||
3 | single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))] | ||
4 | |||
5 | body: //div[@id = 'article-body'] | ||
6 | strip_id_or_class:article-tools | ||
7 | strip_id_or_class:js_target | ||
8 | strip_id_or_class:marker | ||
9 | author://div[@id = 'page-title']/h3 | ||
10 | date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')] | ||
11 | |||
12 | |||
13 | test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nymag.com.txt b/inc/3rdparty/site_config/standard/nymag.com.txt deleted file mode 100755 index 7a1d62d9..00000000 --- a/inc/3rdparty/site_config/standard/nymag.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h2[contains(@class, 'primary')] | ||
2 | body: //div[@id='story'] | ||
3 | author: //*[@class='by']/a | ||
4 | date: substring-after(//*[@class='date'], 'Published') | ||
5 | |||
6 | next_page_link: //div[@class='page-navigation']//li[@class='next']/a | ||
7 | |||
8 | test_url: http://nymag.com/news/features/wall-street-2012-2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nyteknik.se.txt b/inc/3rdparty/site_config/standard/nyteknik.se.txt deleted file mode 100755 index f4bedb6a..00000000 --- a/inc/3rdparty/site_config/standard/nyteknik.se.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div[@class="article default-article"]/h1 | ||
2 | author: //p[@class="author"]/a[2] | ||
3 | |||
4 | # Article introduction: | ||
5 | #move_into(//div[@class="article-bread"]): //p[@class="lead"] | ||
6 | |||
7 | body: //div[@class="article-bread"] | ||
8 | test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt deleted file mode 100755 index 54735ec7..00000000 --- a/inc/3rdparty/site_config/standard/nytimes.com.txt +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | title://h1[@class="articleHeadline"] | ||
2 | body://div[@id="article"] | ||
3 | body://*[@itemprop="articleBody"] | ||
4 | strip_id_or_class:articleTools | ||
5 | strip_id_or_class:readerscomment | ||
6 | #strip://div[contains(@class, "articleInline runaroundLeft")] | ||
7 | strip: //div[contains(@class, "doubleRule")] | ||
8 | # strip image credit - appears as a bold heading | ||
9 | strip: //div[contains(@class, "articleInline")]//h6 | ||
10 | strip_id_or_class:enlargeThis | ||
11 | strip_id_or_class:pageLinks | ||
12 | strip_id_or_class:memberTools | ||
13 | strip_id_or_class:articleExtras | ||
14 | strip_id_or_class:singleAd | ||
15 | strip_id_or_class:byline | ||
16 | strip_id_or_class:dateline | ||
17 | strip_id_or_class:articleheadline | ||
18 | strip_id_or_class:articleBottomExtra | ||
19 | strip_id_or_class:shareTools | ||
20 | strip://a[contains(@href, 'nytimes.com/adx/')] | ||
21 | strip: //nyt_byline | ||
22 | strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] | ||
23 | strip: //p[@class='caption']//a[contains(., 'More Photos')] | ||
24 | |||
25 | prune: no | ||
26 | tidy: no | ||
27 | |||
28 | find_string: <script | ||
29 | replace_string: <div style="display:none" | ||
30 | find_string: </script> | ||
31 | replace_string: </div> | ||
32 | |||
33 | date: substring-after(//*[contains(@class, 'dateline')], 'Published:') | ||
34 | |||
35 | single_page_link: //link[contains(@href, 'pagewanted=all')] | ||
36 | single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href | ||
37 | single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all') | ||
38 | #single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] | ||
39 | |||
40 | strip://ul[@id = 'toolsList'] | ||
41 | strip://h6[@class = 'kicker'] | ||
42 | author:substring-after(//h6[@class='byline'],'By ') | ||
43 | |||
44 | test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html | ||
45 | test_contains: In this column I want to look at a not uncommon way of writing | ||
46 | |||
47 | test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html | ||
48 | test_contains: IF you’ve seen enough of Aaron Sorkin’s theater | ||
49 | |||
50 | test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html | ||
51 | test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html | ||
52 | test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html | ||
53 | test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html | ||
diff --git a/inc/3rdparty/site_config/standard/nzz.ch.txt b/inc/3rdparty/site_config/standard/nzz.ch.txt deleted file mode 100755 index 749f4f2a..00000000 --- a/inc/3rdparty/site_config/standard/nzz.ch.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //*[@class='article-full'] | ||
2 | title: //h3 | ||
3 | strip: //header[@class='group'] | ||
4 | #body: //p[@class='lead'] | ||
5 | #move_into(//p[@class='lead']): //*[@class='article-full']/figure | ||
6 | #move_into(//p[@class='lead']): //div[@id='articleBodyText'] | ||
7 | strip: //div[@id='social-media-floater'] | ||
8 | strip: //div[@class='advertisement'] | ||
9 | strip: //div[@class='infobox'] | ||
10 | strip: //div[@id='articleComments'] | ||
11 | |||
12 | test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/observer.com.txt b/inc/3rdparty/site_config/standard/observer.com.txt deleted file mode 100755 index 0b107538..00000000 --- a/inc/3rdparty/site_config/standard/observer.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //article[contains(@class, 'instapaper_body')] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | single_page_link: //a[@id='print-button'] | ||
6 | |||
7 | test_url: http://www.observer.com/2008/would-you-take-tumblr-man \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/off.net.mk.txt b/inc/3rdparty/site_config/standard/off.net.mk.txt deleted file mode 100755 index bf107876..00000000 --- a/inc/3rdparty/site_config/standard/off.net.mk.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[(@id = "content")] | ||
2 | strip: //div[(@class = "links-bar")] | ||
3 | strip: //div[(@class = "povrzani")] | ||
4 | strip: //div[(@class = "povrzani-dolu")] | ||
5 | strip: //div[(@class = "tags")] | ||
6 | strip: //h1[(@id = "page-title")] | ||
7 | test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/omaha.com.txt b/inc/3rdparty/site_config/standard/omaha.com.txt deleted file mode 100755 index 53db061d..00000000 --- a/inc/3rdparty/site_config/standard/omaha.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='story'] | ||
2 | test_url: http://www.omaha.com/article/20111031/BIGRED/111039984#pelini-tremendous-challenge-ahead-for-huskers \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/omiliya.org.txt b/inc/3rdparty/site_config/standard/omiliya.org.txt deleted file mode 100755 index 4b3a7202..00000000 --- a/inc/3rdparty/site_config/standard/omiliya.org.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id='squeeze']/h1 | ||
2 | strip: //div[@id='squeeze']/h1 | ||
3 | author: //div[@class='submitted']/a | ||
4 | strip: //div[@class='submitted']/a | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | |||
8 | |||
9 | test_url: http://omiliya.org/content/predchuvstvie.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/on.net.mk.txt b/inc/3rdparty/site_config/standard/on.net.mk.txt deleted file mode 100755 index a95c2b0f..00000000 --- a/inc/3rdparty/site_config/standard/on.net.mk.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[(@class = "statija")] | ||
2 | strip: //div[(@class = "relatedBlock")] | ||
3 | strip: //div[(@class = "swftools")] | ||
4 | strip: //table[(@class = "links")] | ||
5 | test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt deleted file mode 100755 index 448bb7e1..00000000 --- a/inc/3rdparty/site_config/standard/online.wsj.com.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@id='article_story_body'] | ||
3 | |||
4 | author: //h3[@class='byline']/a | ||
5 | # for slide show content | ||
6 | body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] | ||
7 | date: //li[@class='dateStamp']/small | ||
8 | |||
9 | strip_id_or_class: insetFullBracket | ||
10 | strip_id_or_class: insettipBox | ||
11 | #strip_id_or_class: legacyInset | ||
12 | strip_id_or_class: recipeACShopAndBuyText | ||
13 | |||
14 | strip: //div[contains(@class, 'insetContent')]//cite | ||
15 | strip: //*[contains(@style, 'visibility: hidden;')] | ||
16 | strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] | ||
17 | strip: //div[contains(@class, 'carousel')] | ||
18 | |||
19 | prune: no | ||
20 | tidy: no | ||
21 | |||
22 | test_url: http://online.wsj.com/news/articles/SB10001424052702304626304579509100018004342 | ||
23 | test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html | ||
24 | # slide show | ||
25 | test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html | ||
diff --git a/inc/3rdparty/site_config/standard/onlinewelten.com.txt b/inc/3rdparty/site_config/standard/onlinewelten.com.txt deleted file mode 100755 index 1609fa83..00000000 --- a/inc/3rdparty/site_config/standard/onlinewelten.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@id='news_detail']//div[@class='contents clearfix'] | ||
2 | test_url: http://www.onlinewelten.com/games/aliens-colonial-marines/news/offizielle-spiel-ankuendigung-nintendos-wii-u-103690/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/onstartups.com.txt b/inc/3rdparty/site_config/standard/onstartups.com.txt deleted file mode 100755 index cccce8cd..00000000 --- a/inc/3rdparty/site_config/standard/onstartups.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip: //div[@id="dnn_LeftPane"] | //div[@id="dnn_ContentPane"]//h1 | //div[@id="dnn_ContentPane"]//p[@class="Normal"] | //div[@class="Submissions"] | //div[@id="listing"]//h3 | //div[@id="listing"][2] | //div[@id="emart-fail"] | //div[@id="emart-success"] | //div[@id="emart-form"] | ||
2 | test_url: http://onstartups.com/tabid/3339/bid/37737/Secrets-Of-Freemium-Pricing-Make-The-Cheapskates-Pay.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt b/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt deleted file mode 100755 index a9bf71ef..00000000 --- a/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | |||
3 | author: //a[@rel='author'] | ||
4 | |||
5 | date: substring-before(//aside[@class='entry-meta'], '|') | ||
6 | |||
7 | body: //div[@class='entry-content'] | ||
8 | test_url: http://ontologicalgeek.com/change-or-live-final-fantasy-x-as-catholic-dystopia/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/opensource.org.txt b/inc/3rdparty/site_config/standard/opensource.org.txt deleted file mode 100755 index 2bd3ccdb..00000000 --- a/inc/3rdparty/site_config/standard/opensource.org.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='content clear-block'] | ||
2 | test_url: http://opensource.org/node/537 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/openthemagazine.com.txt b/inc/3rdparty/site_config/standard/openthemagazine.com.txt deleted file mode 100755 index 6913eb0e..00000000 --- a/inc/3rdparty/site_config/standard/openthemagazine.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@id = 'content-inner'] | ||
2 | strip: //div[@id = 'content-bottom'] | ||
3 | strip_id_or_class: print_sharebutton | ||
4 | test_url: http://openthemagazine.com/article/nation/sania-vs-saina \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/openwebx.org.txt b/inc/3rdparty/site_config/standard/openwebx.org.txt deleted file mode 100755 index a5dcdb59..00000000 --- a/inc/3rdparty/site_config/standard/openwebx.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class="chapter"] | ||
2 | prune: no | ||
3 | tidy: no | ||
4 | test_url: http://openwebx.org/docs/springext.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/orf.at.txt b/inc/3rdparty/site_config/standard/orf.at.txt deleted file mode 100755 index fb4f2181..00000000 --- a/inc/3rdparty/site_config/standard/orf.at.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | single_page_link: //div[@id='content']//p[@class='readMore']/a | ||
2 | |||
3 | title: //div[@class='hidden offscreen']/h2 | ||
4 | body: //div[@id="storyText"] | ||
5 | move_into(//div[@id='storyText']): //div[@class='fact'] | ||
6 | strip: //small[@class='credit'] | ||
7 | strip: //small[@class='caption'] | ||
8 | date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') | ||
9 | strip: //p[@class='toplink'] | ||
10 | |||
11 | test_url: http://orf.at/stories/2084731/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/origo.hu.txt b/inc/3rdparty/site_config/standard/origo.hu.txt deleted file mode 100755 index 50717f25..00000000 --- a/inc/3rdparty/site_config/standard/origo.hu.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: /html/body/div[5]/div[2]/h1 | ||
2 | body: /html/body/div[5]/div[2]/div[6]/div/div | ||
3 | body: //*[@id="cikk"] | ||
4 | strip: /html/body/div[5]/div[2]/h1 | ||
5 | strip: /html/body/div[5]/div[2]/div[4] | ||
6 | strip: //*[@id="multidoboz"] | ||
7 | strip: /html/body/div[5]/div[2]/div[6]/div[2] | ||
8 | strip: //*[@id="comments"] | ||
9 | strip: //*[@id="rating-doboz"] | ||
10 | strip: /html/body/div[5]/div[2]/div[10] | ||
11 | strip: /html/body/div[5]/div[2]/a | ||
12 | strip: /html/body/div[5]/div[2]/span | ||
13 | strip: /html/body/div[5]/div[2]/span[2] | ||
14 | strip: /html/body/div[5]/div[2]/span[3] | ||
15 | strip: /html/body/div[5]/div[2]/span[4] | ||
16 | strip: /html/body/div[5]/div[2]/span[5] | ||
17 | strip: //*[@id="kommentszam"] | ||
18 | test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/oschina.net.txt b/inc/3rdparty/site_config/standard/oschina.net.txt deleted file mode 100755 index 56451539..00000000 --- a/inc/3rdparty/site_config/standard/oschina.net.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h1 | ||
2 | strip_id_or_class: syntaxhighlighter | ||
3 | test_url: http://www.oschina.net/translate/event-based-programming-what-async-has-over-sync?print \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt deleted file mode 100755 index 7e2985e0..00000000 --- a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pakmedia.tv.txt b/inc/3rdparty/site_config/standard/pakmedia.tv.txt deleted file mode 100755 index 5d6e4c8c..00000000 --- a/inc/3rdparty/site_config/standard/pakmedia.tv.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | body: //article//div[@class='entry'] | ||
3 | strip_id_or_class: addthis | ||
4 | strip_id_or_class: gdsrcacheloader | ||
5 | strip_id_or_class: entry-meta | ||
6 | strip_id_or_class: entry-tags | ||
7 | strip_id_or_class: authorbox | ||
8 | strip: //div[@class='entry']/p[1] | ||
9 | strip: //img[@width='600' and @height='70'] | ||
10 | # related posts | ||
11 | strip: //h3[contains(., 'Related posts')] | ||
12 | strip: //div[contains(@style, 'border: 0pt none ; margin: 0pt; padding: 0pt;')] | ||
13 | |||
14 | prune: no | ||
15 | tidy: no | ||
16 | |||
17 | test_url: http://pakmedia.tv/tv-one/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pandagon.net.txt b/inc/3rdparty/site_config/standard/pandagon.net.txt deleted file mode 100755 index 35121e14..00000000 --- a/inc/3rdparty/site_config/standard/pandagon.net.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title://h2 | ||
2 | author://div[@class="posted"]/a | ||
3 | date://div[@class="date"] | ||
4 | body://div[@class="entry"] | ||
5 | test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pandodaily.com.txt b/inc/3rdparty/site_config/standard/pandodaily.com.txt deleted file mode 100755 index a5d427af..00000000 --- a/inc/3rdparty/site_config/standard/pandodaily.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | tidy: no | ||
2 | body: //article | ||
3 | date: //time/@datetime | ||
4 | strip_id_or_class: sharedaddy | ||
5 | test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/panic.com.txt b/inc/3rdparty/site_config/standard/panic.com.txt deleted file mode 100755 index e0e2595c..00000000 --- a/inc/3rdparty/site_config/standard/panic.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | date: //h3[@class='postDate'] | ||
3 | test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/papodehomem.com.br.txt b/inc/3rdparty/site_config/standard/papodehomem.com.br.txt deleted file mode 100755 index 2c522da4..00000000 --- a/inc/3rdparty/site_config/standard/papodehomem.com.br.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2[@class="page_title"] | ||
2 | body: //div[@class="entry arquivo"] | ||
3 | author: //span[@class="author"] | ||
4 | footnotes: yes | ||
5 | prune: yes | ||
6 | test_url: http://papodehomem.com.br/um-relato-confessional-sobre-a-maioridade-penal/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/parislemon.com.txt b/inc/3rdparty/site_config/standard/parislemon.com.txt deleted file mode 100755 index cd9bd55d..00000000 --- a/inc/3rdparty/site_config/standard/parislemon.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2[@class="post-title"] | ||
2 | author: substring-after(//div[@class="description"],'Words by ') | ||
3 | date: //li[@class="date"] | ||
4 | strip: //h2[@class="post-title"] | ||
5 | body: //div[@class="copy"] | ||
6 | test_url: http://parislemon.com/post/13462682469/the-15-inch-air \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/parliament.uk.txt b/inc/3rdparty/site_config/standard/parliament.uk.txt deleted file mode 100755 index caaa2e94..00000000 --- a/inc/3rdparty/site_config/standard/parliament.uk.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='news-article'] | ||
3 | test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pastebin.com.txt b/inc/3rdparty/site_config/standard/pastebin.com.txt deleted file mode 100755 index 03b67b7e..00000000 --- a/inc/3rdparty/site_config/standard/pastebin.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title://div[@class="paste_box_line1"]/h1 | ||
2 | author://div[@class="paste_box_line2"]/a | ||
3 | body://div[@class="text"] | ||
4 | date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|') | ||
5 | dissolve://li | ||
6 | test_url: http://pastebin.com/LAykd1es \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt deleted file mode 100755 index c535158d..00000000 --- a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='ff-pastepad-content'] | ||
3 | prune: no | ||
4 | # todo: add test file | ||
5 | test_url: http://pastepad.fivefilters.org/test.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pathawks.com.txt b/inc/3rdparty/site_config/standard/pathawks.com.txt deleted file mode 100755 index 25042224..00000000 --- a/inc/3rdparty/site_config/standard/pathawks.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title://*[contains(@class,'post-title')] | ||
2 | body://div[contains(@class,'post-body')] | ||
3 | body://div[contains(@class,'entry-content')] | ||
4 | strip_comments:no | ||
5 | prune:no | ||
6 | convert_double_br_tags:yes | ||
7 | tidy:yes | ||
8 | test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pcast.me.txt b/inc/3rdparty/site_config/standard/pcast.me.txt deleted file mode 100755 index ae38e8e1..00000000 --- a/inc/3rdparty/site_config/standard/pcast.me.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | prune: no | ||
2 | test_url: http://pcast.me/shownotes/get/16t \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pcmag.com.txt b/inc/3rdparty/site_config/standard/pcmag.com.txt deleted file mode 100755 index 96bdd95a..00000000 --- a/inc/3rdparty/site_config/standard/pcmag.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | prune:yes | ||
2 | |||
3 | date://*[contains(@class,'date')] | ||
4 | |||
5 | body://div[contains(@id,'content')] | ||
6 | |||
7 | next_page_link://a[contains(.,'Next >')] | ||
8 | |||
9 | strip_id_or_class:sponsors | ||
10 | test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pcworld.com.txt b/inc/3rdparty/site_config/standard/pcworld.com.txt deleted file mode 100755 index 7193f87e..00000000 --- a/inc/3rdparty/site_config/standard/pcworld.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //div[@class='articleHead']//h1 | ||
2 | author: //div[@class="author-name"]/a[1] | ||
3 | body: //div[@class="main"] | ||
4 | |||
5 | # remove 'From the Lab' and 'Recent posts' text | ||
6 | strip: //div[@class='blogLabel'] | ||
7 | |||
8 | # remove byline and meta info | ||
9 | strip: //h1 | ||
10 | strip: //div[@class="article-meta"] | ||
11 | strip: //div[@class="author-info"] | ||
12 | |||
13 | #strip tags and categories | ||
14 | strip: //div[@class="department"] | ||
15 | |||
16 | #strip product cap links | ||
17 | strip: //div[@class="cap-main"] | ||
18 | strip: //div[@id="compare-lede"] | ||
19 | test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/penny-arcade.com.txt b/inc/3rdparty/site_config/standard/penny-arcade.com.txt deleted file mode 100755 index a0d5099e..00000000 --- a/inc/3rdparty/site_config/standard/penny-arcade.com.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | # 2012-01-14 carlo@... - fixed title, body; added author, date | ||
2 | |||
3 | title: //div[@class="title"]/h2/a | ||
4 | # body: //div[@class="post"] | ||
5 | # author: //p[@class="iconEmail"]/a | ||
6 | # date: //p[@class="iconDate"] | ||
7 | |||
8 | # 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report | ||
9 | |||
10 | # Penny Arcade | ||
11 | |||
12 | author: //li[@class="iconEmail"]/a | ||
13 | date: //li[@class="iconDate"] | ||
14 | body: //div[@class="body"] | ||
15 | |||
16 | # PA Report | ||
17 | |||
18 | author: //div[@class="meta"]/p/a | ||
19 | date: substring-after(//div[@class="meta"]/p, '/ ') | ||
20 | title: substring-after(//title, '- ') | ||
21 | |||
22 | test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news | ||
23 | test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pentaxforums.com.txt b/inc/3rdparty/site_config/standard/pentaxforums.com.txt deleted file mode 100755 index 00f61a48..00000000 --- a/inc/3rdparty/site_config/standard/pentaxforums.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | next_page_link: //a[contains(., 'Next:')] | ||
2 | test_url: http://www.pentaxforums.com/reviews/long-exposure-handhelds/introduction.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt deleted file mode 100755 index 5ba5f772..00000000 --- a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | prune: no | ||
2 | tidy: no | ||
3 | body: //div[@class='article-content'] | ||
4 | dissolve: //nobr/a | ||
5 | dissolve: //nobr | ||
6 | test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/philly.com.txt b/inc/3rdparty/site_config/standard/philly.com.txt deleted file mode 100755 index accbd60b..00000000 --- a/inc/3rdparty/site_config/standard/philly.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | author: //p[@class='byline']/span | ||
3 | body: //@id='body-content' | ||
4 | date: //div[@class='article_timestamp']/span | ||
5 | |||
6 | strip: //@class=b-group | ||
7 | strip: //*[contains(@style, 'none')] | ||
8 | strip: //a[contains(@href, 'comments')] | ||
9 | strip: //*[contains(@class, 'comment')] | ||
10 | test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt deleted file mode 100755 index 7f7e3830..00000000 --- a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | author: substring-before(//div[@class='post_meta'],' on') | ||
2 | date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on') | ||
3 | title: //h1[class='post_title'] | ||
4 | body: //div[@class='article'] | ||
5 | |||
6 | test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/php.net.txt b/inc/3rdparty/site_config/standard/php.net.txt deleted file mode 100755 index cc643f05..00000000 --- a/inc/3rdparty/site_config/standard/php.net.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | strip_id_or_class: manualnavbar | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.php.net/manual/en/migration5.incompatible.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/physicstoday.org.txt b/inc/3rdparty/site_config/standard/physicstoday.org.txt deleted file mode 100755 index 624055b7..00000000 --- a/inc/3rdparty/site_config/standard/physicstoday.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@class='abstitle']//h1 | ||
2 | author: //div[@class='authorList'] | ||
3 | body: //div[@id='fulltext_body'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pinterest.com.txt b/inc/3rdparty/site_config/standard/pinterest.com.txt deleted file mode 100755 index 01b6df41..00000000 --- a/inc/3rdparty/site_config/standard/pinterest.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //title | ||
2 | body: //div[contains(@class, 'imageContainer')] | ||
3 | |||
4 | test_url: http://pinterest.com/pin/380906080954441188/ | ||
5 | test_url: http://pinterest.com/michaelsorm/architecture/rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pitchfork.com.txt b/inc/3rdparty/site_config/standard/pitchfork.com.txt deleted file mode 100755 index eee96a9c..00000000 --- a/inc/3rdparty/site_config/standard/pitchfork.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title:concat(//h1,' - ',//h2,' - ',//h3) | ||
2 | author://address | ||
3 | date://span[@class='pub-date'] | ||
4 | body://div[@id='main'] | ||
5 | single_page_link://link[@rel='canonical'] | ||
6 | strip://div[@class='info'] | ||
7 | strip_id_or_class:'object-grid related-content' | ||
8 | strip_id_or_class:'object-prevnext' | ||
9 | strip_id_or_class:'object-header' | ||
10 | strip_id_or_class:'source' | ||
11 | strip_id_or_class:'label' | ||
12 | strip_id_or_class:'title' | ||
13 | dissolve://ul | ||
14 | strip://li[@class='next'] | ||
15 | strip://li[@class='prev'] | ||
16 | test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittnews.com.txt b/inc/3rdparty/site_config/standard/pittnews.com.txt deleted file mode 100755 index c302526d..00000000 --- a/inc/3rdparty/site_config/standard/pittnews.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h2[@class='post-title'] | ||
2 | author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/') | ||
3 | date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in') | ||
4 | strip: //h2[@class='post-title'] | ||
5 | strip: //p[@class='post-details'] | ||
6 | strip: //h3[@class='post-byline'] | ||
7 | body: //div[@id='content'] | ||
8 | test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt deleted file mode 100755 index f2948528..00000000 --- a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: substring-before(//title,'pirates.com') | ||
2 | date: //span[@class='timeStamp'] | ||
3 | author: substring-before(substring-after(//div[@class='byLine'],'By'),'/') | ||
4 | body: //div[@id='article'] | ||
5 | #strip: //div[@class='inner'] | ||
6 | strip: //div[@id='article_head'] | ||
7 | strip: //p[@class='tagLine'] | ||
8 | strip: //div[@id='article_related_links'] | ||
9 | strip: //div[@id='article_related_mlb'] | ||
10 | strip: //div[@id='article_related_club'] | ||
11 | strip: //span[@class='more'] | ||
12 | strip: //div[@class='article_component'] | ||
13 | strip: //span[@class='screen_reader'] | ||
14 | strip: //ul[@class='columnists_blurb'] | ||
15 | test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt deleted file mode 100755 index cc7891f3..00000000 --- a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: substring-before(//title,'- Pittsburgh Tribune') | ||
2 | author: substring-before(substring-after(//div[@class='byline'],'By '),',') | ||
3 | date: substring-after(substring-after(//div[@class='byline'],','),',') | ||
4 | body: //div[@id='storyBody'] | ||
5 | strip: //div[@class='morestories'] | ||
6 | dissolve: //p[@class='subheader'] | ||
7 | test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt deleted file mode 100755 index 4d02f6bb..00000000 --- a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //title | ||
2 | author: substring-after(//div[@class='by-line'],'BY') | ||
3 | |||
4 | body: //div[@id='article-body'] | ||
5 | |||
6 | strip: //div[@class='by-line'] | ||
7 | strip: //div[@id='article-body']/h1 | ||
8 | test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt deleted file mode 100755 index c372284a..00000000 --- a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //span[@class='StoryHeadline'] | ||
2 | strip: //div[@class='fivevert'] | ||
3 | body: //div[@id='Content'] | ||
4 | test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt deleted file mode 100755 index 571874a4..00000000 --- a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class='articletitle'] | ||
2 | author: substring-after(//span[@class='author'],'by') | ||
3 | date: //span[@class='created'] | ||
4 | body: //div[@class='article'] | ||
5 | strip: //div[@class='headline'] | ||
6 | strip: //p[@class='articleinfo'] | ||
7 | #dissolve: //p[@class='subheader'] | ||
8 | test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/planetvita.de.txt b/inc/3rdparty/site_config/standard/planetvita.de.txt deleted file mode 100755 index bfc3342d..00000000 --- a/inc/3rdparty/site_config/standard/planetvita.de.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@id='frnRahmen']/div/div[@id='content']/div[2]/h2 | ||
2 | author: //div[@id='content']/div[1]/div/a | ||
3 | body: //div[@id='content']/div[2]/span | ||
4 | strip: //div[@id='commenthead'] | ||
5 | test_url: http://www.planetvita.de/news/10389-psn-store-update-vom-03-april-neue-inhalte-fuer-psvita.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/playboy.com.txt b/inc/3rdparty/site_config/standard/playboy.com.txt deleted file mode 100755 index 92834947..00000000 --- a/inc/3rdparty/site_config/standard/playboy.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | author: //article//*[@class="author"] | ||
2 | date: //article//*[@class="publication-date"] | ||
3 | body: //article | ||
4 | strip: //article/header | ||
5 | strip: //article/section | ||
6 | test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/plus.google.com.txt b/inc/3rdparty/site_config/standard/plus.google.com.txt deleted file mode 100755 index 4a7ea126..00000000 --- a/inc/3rdparty/site_config/standard/plus.google.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | body: //div[@id='contentPane']//div[@class='vg'] | ||
2 | body: //div[@id='contentPane'] | ||
3 | |||
4 | # Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :( | ||
5 | |||
6 | author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title | ||
7 | |||
8 | |||
9 | strip: //*[@title="People who +1'd this"]/../.. | ||
10 | strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')] | ||
11 | strip: //*[@role='menu'] | ||
12 | strip: //img[contains(@alt, 'profile photo')] | ||
13 | strip: //*[@class='a-f-i-Ad'] | ||
14 | |||
15 | tidy: no | ||
16 | |||
17 | test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt deleted file mode 100755 index ec151b42..00000000 --- a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h2[@class='jcw-pagetitle' | ||
2 | date: //p[@class='postinfo'] | ||
3 | body: //div[@class='contenttext'] | ||
4 | test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt deleted file mode 100755 index 65ddba54..00000000 --- a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@id="content"]/div[1] | ||
2 | |||
3 | title: //h1[@class="entry-title"] | ||
4 | test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt deleted file mode 100755 index d8f5e575..00000000 --- a/inc/3rdparty/site_config/standard/politico.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title://div[contains(@class, "article")]/h1 | ||
2 | body://div[contains(@class,"story-text")] | ||
3 | |||
4 | # Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] | ||
5 | |||
6 | next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a | ||
7 | date://meta[@name="publish_date"]/@content | ||
8 | |||
9 | strip://div[contains(@class, "breadcrumbs")] | ||
10 | strip://a[contains(@class, "hidden")] | ||
11 | strip://div[contains(@class, "story-embed")] | ||
12 | strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. | ||
13 | test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/politifact.com.txt b/inc/3rdparty/site_config/standard/politifact.com.txt deleted file mode 100755 index 65a8fc57..00000000 --- a/inc/3rdparty/site_config/standard/politifact.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@id="content"] | ||
2 | |||
3 | strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"] | ||
4 | test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/politiken.dk.txt b/inc/3rdparty/site_config/standard/politiken.dk.txt deleted file mode 100755 index b13f8f87..00000000 --- a/inc/3rdparty/site_config/standard/politiken.dk.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | # 21/10-2011: | ||
2 | # Added Author+Date | ||
3 | # Remove fakta-boks if found | ||
4 | # Deleted 'Læs også...' filter | ||
5 | # - Change in markup caused it to strip too much. | ||
6 | |||
7 | author://span[@class='autor-name'] | ||
8 | date:substring-after(//div[@class='art-created'], ' ') | ||
9 | title: //h1[contains(@class, 'stor-type')] | ||
10 | body: //div[@id='art-body'] | ||
11 | strip: //div[@class='art-fakta article-box'] | ||
12 | |||
13 | test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/polygon.com.txt b/inc/3rdparty/site_config/standard/polygon.com.txt deleted file mode 100755 index 8fe9b1be..00000000 --- a/inc/3rdparty/site_config/standard/polygon.com.txt +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | body: //div[@id='article-content'] | ||
2 | body: //article[@id='entry-top']/div[@class='float_wrapper'] | ||
3 | author: //header/p[@class='byline']/em/a | ||
4 | date: //header/p[@class='byline']/span[@class='timestamp'] | ||
5 | |||
6 | strip: //div[@id='article-content']//header | ||
7 | strip: //label | ||
8 | |||
9 | #photos on left column (delete all) | ||
10 | strip: //div[@class='big_photo'] | ||
11 | |||
12 | #photos on left column (remove extras used for scroll effect) | ||
13 | #strip: //div[@class='big_photo']/div[./img] | ||
14 | #strip: //div[@class='big_photo']/img[position()>1] | ||
15 | |||
16 | strip_id_or_class: vox-lazy-load | ||
17 | strip_id_or_class: social_buttons | ||
18 | strip_id_or_class: feature_toc | ||
19 | |||
20 | prune: no | ||
21 | |||
22 | find_string: <noscript> | ||
23 | replace_string: <div> | ||
24 | find_string: </noscript> | ||
25 | replace_string: </div> | ||
26 | |||
27 | #find_string: <script | ||
28 | #replace_string: <div style="display:none" | ||
29 | #find_string: </script> | ||
30 | #replace_string: </div> | ||
31 | |||
32 | strip: //div[@class='float_wrapper']/header | ||
33 | test_url: http://www.polygon.com/2013/4/5/4189028/donkey-kong-country-returns-3d-new-content | ||
34 | test_url: http://www.polygon.com/features/2013/8/22/4602568/30-years-xbox-360-playstation-3-wii \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/popularmechanics.com.txt b/inc/3rdparty/site_config/standard/popularmechanics.com.txt deleted file mode 100755 index 2582e6fb..00000000 --- a/inc/3rdparty/site_config/standard/popularmechanics.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | next_page_link: //div[@id='longPagination']/a[@class='next'] | ||
2 | |||
3 | title: //div[@id='contentHeader']//h1 | ||
4 | |||
5 | body: //div[@id='articleBody'] | ||
6 | # this is so sad | ||
7 | body: //div[@id='intelliTXT'] | ||
8 | test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/portertech.ca.txt b/inc/3rdparty/site_config/standard/portertech.ca.txt deleted file mode 100755 index 2897cb57..00000000 --- a/inc/3rdparty/site_config/standard/portertech.ca.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | author: //*[(@class = "author")] | ||
2 | date: //*[(@class = "date")] | ||
3 | test_url: http://portertech.ca/2012/12/10/iac-morning-market/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/positioningmag.com.txt b/inc/3rdparty/site_config/standard/positioningmag.com.txt deleted file mode 100755 index f8eeb0a3..00000000 --- a/inc/3rdparty/site_config/standard/positioningmag.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //div[@id="newsDetailTitle"] | ||
2 | author: //span[@id="showAuthor"] | ||
3 | date: //span[@id="showRefDate"] | ||
4 | |||
5 | strip: //div[@id="breadcrumbs"] | ||
6 | strip: //span[@id="PageTitle"] | ||
7 | strip: //div[@id="newsDetailAuthorPublish"] | ||
8 | |||
9 | strip: //div[@class="leadPix"] | ||
10 | |||
11 | strip: //span[@id="ctl00_PageTitle"] | ||
12 | strip: //div[@id="newsDetailTitle"] | ||
13 | convert_double_br_tags:yes | ||
14 | |||
15 | strip: //div[@id="newsDetailCredential"] | ||
16 | strip: //div[@id="sidebar2"] | ||
17 | strip: //div[@id="footer"] | ||
18 | |||
19 | test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/post-gazette.com.txt b/inc/3rdparty/site_config/standard/post-gazette.com.txt deleted file mode 100755 index baa9d69d..00000000 --- a/inc/3rdparty/site_config/standard/post-gazette.com.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | title: //div[@class='story_headline'] | ||
2 | author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/') | ||
3 | date: //div[@class='story_lastupdate'] | ||
4 | body: //div[@id='story'] | ||
5 | strip: //div[@class='story_byline'] | ||
6 | strip: //div[@class='story_lastupdate'] | ||
7 | strip: //div[@class='story_headline'] | ||
8 | strip: //div[@id='abuse'] | ||
9 | strip: //h2 | ||
10 | strip: //div[@class='pagenumbers_wrap'] | ||
11 | strip: //ul[@class='pagenumbers'] | ||
12 | strip: //div[starts-with(., 'To report inappropriate comments')] | ||
13 | |||
14 | strip_id_or_class: story_share | ||
15 | strip_id_or_class: OUTBRAIN | ||
16 | strip_id_or_class: story_box_right | ||
17 | strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']] | ||
18 | strip: //ul[@id='pikame']/li[position()>1] | ||
19 | |||
20 | prune: no | ||
21 | tidy: no | ||
22 | |||
23 | single_page_link: //a[contains(@href, '?p=0')] | ||
24 | |||
25 | test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/ | ||
26 | test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/posta.com.tr.txt b/inc/3rdparty/site_config/standard/posta.com.tr.txt deleted file mode 100755 index 0f01149c..00000000 --- a/inc/3rdparty/site_config/standard/posta.com.tr.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: //div[@id='divAdnetKeyword']/h1 | ||
2 | body: //div[@id='_middle_content_bottom'] | ||
3 | |||
4 | wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img | ||
5 | |||
6 | strip: //div[@id='_middle_content_bottom_child1'] | ||
7 | strip: //div[@id='_middle_content_bottom_child4'] | ||
8 | strip: //div[@class='cls'] | ||
9 | strip: //div[@class='iphoneBox'] | ||
10 | strip: //ul[@class='ilgiliHaber'] | ||
11 | strip: //div[@class='yorumlar'] | ||
12 | strip: //div[@class='kategoriler'] | ||
13 | strip: //div[@class='textSize'] | ||
14 | strip: //span[@class='tarih'] | ||
15 | test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prb.org.txt b/inc/3rdparty/site_config/standard/prb.org.txt deleted file mode 100755 index 3952ea99..00000000 --- a/inc/3rdparty/site_config/standard/prb.org.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1 | ||
2 | date: /html/head/meta[@name="date"]/@content | ||
3 | body: //div[@id="featuredlinksbox"] | ||
4 | strip: //div[@class="relatedbox"] | ||
5 | strip: //h1 | ||
6 | strip: //br | ||
7 | strip_image_src: "/images" | ||
8 | test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt deleted file mode 100755 index 9a49557e..00000000 --- a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='left'] | ||
3 | strip: //h1 | ||
4 | convert_double_br_tags: yes | ||
5 | strip_id_or_class: entry-footer | ||
6 | strip: //h1[. = 'Previously']/following::* | ||
7 | author: string('James Hague') | ||
8 | date: //div[@class = 'entry-footer']/text() | ||
9 | test_url: http://prog21.dadgum.com/105.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prolost.com.txt b/inc/3rdparty/site_config/standard/prolost.com.txt deleted file mode 100755 index 82ebf6bb..00000000 --- a/inc/3rdparty/site_config/standard/prolost.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class='body'] | ||
2 | title: //h2[@class='title'] | ||
3 | date: //span[@class='posted-on'] | ||
4 | test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/propublica.org.txt b/inc/3rdparty/site_config/standard/propublica.org.txt deleted file mode 100755 index d141ac90..00000000 --- a/inc/3rdparty/site_config/standard/propublica.org.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1[@class="article-title"] | ||
2 | author: //meta[@name="author"]/@content | ||
3 | body: //div[@class="article-full"] | ||
4 | strip_id_or_class: sidebar_inject | ||
5 | strip_id_or_class: callout | ||
6 | strip_id_or_class: content-inset | ||
7 | strip_id_or_class: byline-block | ||
8 | strip_id_or_class: photo-caption | ||
9 | strip_id_or_class: foot-tools | ||
10 | |||
11 | test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prosa.dk.txt b/inc/3rdparty/site_config/standard/prosa.dk.txt deleted file mode 100755 index ba9ce8b8..00000000 --- a/inc/3rdparty/site_config/standard/prosa.dk.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author: //p[@class='name'] | ||
2 | date: substring-before(//p[@class='date'], ' | ') | ||
3 | body: //div[@class='news_single_item'] | ||
4 | test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt deleted file mode 100755 index 739d1b9e..00000000 --- a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | #basics | ||
2 | author: (//div[contains(@class,'author')])[1] | ||
3 | date: substring-before(//a[@class='issue'], '—') | ||
4 | #body://div[@class = 'entry'] | ||
5 | # use this until move_into support is ready | ||
6 | body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image'] | ||
7 | |||
8 | #moves header image and tagline into body | ||
9 | move_into(//div[@class='entry']/div)://div[@class = 'lead_image'] | ||
10 | move_into(//div[@class='entry']/div)://div[@class = 'standfirst'] | ||
11 | |||
12 | |||
13 | # moves author info to end of text | ||
14 | move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em | ||
15 | |||
16 | prune: no | ||
17 | |||
18 | # strips social links | ||
19 | strip_id_or_class:login-status | ||
20 | strip_id_or_class:shareinpost | ||
21 | strip_id_or_class:content_subscribe | ||
22 | strip_id_or_class:postinfo | ||
23 | strip_id_or_class:postutils | ||
24 | strip_id_or_class:comments | ||
25 | strip://strong[string(.) = 'Follow Prospect on Twitter'] | ||
26 | test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/protothema.gr.txt b/inc/3rdparty/site_config/standard/protothema.gr.txt deleted file mode 100755 index fae261b0..00000000 --- a/inc/3rdparty/site_config/standard/protothema.gr.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //a[contains(@rel, 'mainphotos')] | //div[contains(@class, 'article-content')] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.protothema.gr//politics/article/326464/diamadopoulou-floridis-kaminis-kai-boutaris-se-ekdilosi-ton-europaion-fileleutheron/ | ||
6 | test_url: http://www.protothema.gr/rss/news/politics/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/psychologytoday.com.txt b/inc/3rdparty/site_config/standard/psychologytoday.com.txt deleted file mode 100755 index 1bb63c29..00000000 --- a/inc/3rdparty/site_config/standard/psychologytoday.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@class="page-title"]/h1 | ||
2 | author: //a[@title="View Bio"] | ||
3 | date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by') | ||
4 | strip://div[@class="page-title"]/h1 | ||
5 | strip://div[@class="article-abstract"] | ||
6 | strip://div[@class="article-meta"] | ||
7 | strip://div[@id="rightColumn"] | ||
8 | strip://div[@id="inline-content-bottom-left"] | ||
9 | test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt deleted file mode 100755 index 8f32d7a4..00000000 --- a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author: //meta[@name="Author"] | ||
2 | date: //meta[@name="Date"] | ||
3 | strip: //h5 | ||
4 | test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/publico.pt.txt b/inc/3rdparty/site_config/standard/publico.pt.txt deleted file mode 100755 index bb6a05e1..00000000 --- a/inc/3rdparty/site_config/standard/publico.pt.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | author: //span[@class="author"] | ||
3 | body: //article[@itemtype="http://schema.org/Article"] | ||
4 | date: //time[@itemprop="dateCreated"] | ||
5 | |||
6 | strip: //header[@class="entry-header single-header"] | ||
7 | strip: //aside[@class="entry-assets"] | ||
8 | strip: //div[@class="entry-options entry-options-above group"] | ||
9 | strip: //div[@class="entry-options entry-options-below group"] | ||
10 | |||
11 | convert_double_br_tags: yes | ||
12 | test_url: http://www.publico.pt/politica/noticia/passos-diz-que-se-limitacao-de-mandatos-fosse-para-todos-os-concelhos-estaria-claro-na-lei-1577691 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt deleted file mode 100755 index 0f1392a4..00000000 --- a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //div[@class='title'] | ||
2 | body: //div[@class='body'] | ||
3 | next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a | ||
4 | test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/qctimes.com.txt b/inc/3rdparty/site_config/standard/qctimes.com.txt deleted file mode 100755 index 3c3edfeb..00000000 --- a/inc/3rdparty/site_config/standard/qctimes.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | # this site seems to work OK in the web view, but only occasionally in the instapaper app itself. | ||
2 | |||
3 | body: //div[@class='entry-content'] | ||
4 | author: //span[@class='byline'] | ||
5 | test_url: http://qctimes.com/news/local/woman-faces-perjury-charges-in-meth-case/article_83f4c470-956a-11e2-a921-001a4bcf887a.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt deleted file mode 100755 index c17fb312..00000000 --- a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //div[contains(@class, "hentry")]/h3 | ||
2 | |||
3 | author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")] | ||
4 | |||
5 | date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under") | ||
6 | |||
7 | body: //div[contains(@class, "entry")] | ||
8 | |||
9 | strip_id_or_class: addtoany_share_save_container | ||
10 | strip_id_or_class: postmetadata | ||
11 | strip_id_or_class: author_bio | ||
12 | strip_id_or_class: author_bio_2 | ||
13 | strip: //div[contains(@class, "hentry")]/h3 | ||
14 | test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/queerty.com.txt b/inc/3rdparty/site_config/standard/queerty.com.txt deleted file mode 100755 index fc7ab37f..00000000 --- a/inc/3rdparty/site_config/standard/queerty.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class='copy'] | ||
2 | title: //h1[@class='hed'] | ||
3 | test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/quepasa.cl.txt b/inc/3rdparty/site_config/standard/quepasa.cl.txt deleted file mode 100755 index fb09a8f3..00000000 --- a/inc/3rdparty/site_config/standard/quepasa.cl.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | body: //div[@class="cuerpoArticulo"] | ||
4 | |||
5 | |||
6 | test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/quora.com.txt b/inc/3rdparty/site_config/standard/quora.com.txt deleted file mode 100755 index 732d12d7..00000000 --- a/inc/3rdparty/site_config/standard/quora.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | body: //div[contains(@class, 'main_col')] | ||
4 | title: //h1 | ||
5 | |||
6 | strip_id_or_class: hidden | ||
7 | strip_id_or_class: item_action_bar | ||
8 | strip_id_or_class: answer_voters | ||
9 | strip_id_or_class: question_topics | ||
10 | strip_id_or_class: answer_header_text | ||
11 | strip_id_or_class: editor_link | ||
12 | strip_id_or_class: view_tag | ||
13 | strip_id_or_class: include_details | ||
14 | strip_id_or_class: sig_edit | ||
15 | strip_id_or_class: profile_photo_img | ||
16 | |||
17 | test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/racjonalista.pl.txt b/inc/3rdparty/site_config/standard/racjonalista.pl.txt deleted file mode 100755 index 19c719d4..00000000 --- a/inc/3rdparty/site_config/standard/racjonalista.pl.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: /html/body/center/b | ||
2 | date: /html/body/table/tr[2]/td/i | ||
3 | single_page_link: //*[@id='oTxt']/table[3]/tr[2]/td/a[1] | ||
4 | |||
5 | test_url: http://www.racjonalista.pl/kk.php/s,7214/q,Geneza.szubrawstwa \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt deleted file mode 100755 index fa66b815..00000000 --- a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | date://span[@class='date'] | ||
2 | body://div[@class='entry-body'] | ||
3 | test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/radionz.co.nz.txt b/inc/3rdparty/site_config/standard/radionz.co.nz.txt deleted file mode 100755 index 2496ddab..00000000 --- a/inc/3rdparty/site_config/standard/radionz.co.nz.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class='body'] | ||
2 | title: //div[@class='newsstory']/h2 | ||
3 | test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/randsinrepose.com.txt b/inc/3rdparty/site_config/standard/randsinrepose.com.txt deleted file mode 100755 index 6970a744..00000000 --- a/inc/3rdparty/site_config/standard/randsinrepose.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //div[@id='center-col']/h4 | ||
2 | author: substring-before(//title,'In') | ||
3 | date: substring-after(//div[@class='commenttext']/span,'#') | ||
4 | body: //div[@id='center-col'] | ||
5 | strip: //div[@id='center-col']/h4 | ||
6 | strip: //div[@class='graytext'] | ||
7 | |||
8 | # Anthony Perez-Sanz 2012.3.14 | ||
9 | # Removed long gif from the end | ||
10 | strip: //img[@src='http://www.randsinrepose.com/spreader.gif'] | ||
11 | test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/readability.com.txt b/inc/3rdparty/site_config/standard/readability.com.txt deleted file mode 100755 index 2d5aba76..00000000 --- a/inc/3rdparty/site_config/standard/readability.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link: //link[@rel='canonical']/@href | ||
2 | |||
3 | test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/readwriteweb.com.txt b/inc/3rdparty/site_config/standard/readwriteweb.com.txt deleted file mode 100755 index e2aabda9..00000000 --- a/inc/3rdparty/site_config/standard/readwriteweb.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class="titlelink"] | ||
2 | date: //span[@class="timestamp"]/@data-published | ||
3 | body: //div[@class="asset-content"] | ||
4 | strip_id_or_class: related-entries | ||
5 | strip_id_or_class: like-and-retweet | ||
6 | |||
7 | author: //div[@id="submeta"]/a[1] | ||
8 | test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/real.gr.txt b/inc/3rdparty/site_config/standard/real.gr.txt deleted file mode 100755 index ce0a3c43..00000000 --- a/inc/3rdparty/site_config/standard/real.gr.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[contains(@class, 'article-photo-wrapper')] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://www.real.gr/DefaultArthro.aspx?page=arthro&id=360962&catID=1 | ||
5 | test_contains: Επισήμως το αποψινό υπουργικό | ||
diff --git a/inc/3rdparty/site_config/standard/recipe.com.txt b/inc/3rdparty/site_config/standard/recipe.com.txt deleted file mode 100755 index a01aaef4..00000000 --- a/inc/3rdparty/site_config/standard/recipe.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients'] | ||
2 | |||
3 | strip_id_or_class: location | ||
4 | strip_id_or_class: savings | ||
5 | strip_id_or_class: recipeDetailDescButton | ||
6 | |||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.recipe.com/avocado-basil-pasta/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt deleted file mode 100755 index 0403ee86..00000000 --- a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='short-text' or starts-with(@id, 'news-id-')] | ||
2 | prune: no | ||
3 | tidy: no | ||
4 | |||
5 | test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt deleted file mode 100755 index ba342c7c..00000000 --- a/inc/3rdparty/site_config/standard/reddit.com.txt +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # This setup grabs the text from a Reddit self post. It ignores all comments etc. | ||
2 | |||
3 | title: //p[@class="title"]/a/text() | ||
4 | |||
5 | author: //p[@class="tagline"]/a | ||
6 | |||
7 | # this doesn't work for some reason...? | ||
8 | date: //p[@class="tagline"]//@datetime | ||
9 | |||
10 | body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1] | ||
11 | |||
12 | strip_id_or_class: tagline | ||
13 | strip_id_or_class: unvotable-message | ||
14 | strip_id_or_class: buttons | ||
15 | |||
16 | # follow the posted link (unless it's a self post - relative URL, no http://) | ||
17 | single_page_link: //p[@class="title"]/a[contains(@href, 'http://')] | ||
18 | |||
19 | test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ | ||
20 | test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ | ||
21 | test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/redmondpie.com.txt b/inc/3rdparty/site_config/standard/redmondpie.com.txt deleted file mode 100755 index 66cc1707..00000000 --- a/inc/3rdparty/site_config/standard/redmondpie.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //div[@class='posthead']//h2 | ||
2 | body: //div[contains(@class, 'postcontent') or @class='posthead'] | ||
3 | author: //div[@class='posthead']//a[@rel='author'] | ||
4 | |||
5 | strip: //div[@class='posthead']//h2 | ||
6 | replace_string(>Advertisements</div>): ></div> | ||
7 | replace_string(<p>You can follow us on): <p style="display:none;"> | ||
8 | strip_id_or_class: likeThisPost | ||
9 | |||
10 | prune: no | ||
11 | tidy: no | ||
12 | |||
13 | test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt deleted file mode 100755 index 8541a0d4..00000000 --- a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | # Think there might be something up with your parser that it strips out 'print' from the title :) | ||
2 | |||
3 | title: //meta[@name='title']/@content | ||
4 | author: //meta[@name='author']/@content | ||
5 | date: //meta[@name='date']/@content | ||
6 | |||
7 | body: //div[@class='articleText'] | ||
8 | |||
9 | strip: //div[contains(@class, 'day')] | ||
10 | strip: //div[contains(@class, 'month')] | ||
11 | strip: //div[contains(@class, 'year')] | ||
12 | strip: //div[contains(@class, 'time')] | ||
13 | strip: //h1[@class='gl_headline'] | ||
14 | strip: //div[@class='byline'] | ||
15 | strip: //div[@id='left_ear'] | ||
16 | strip: //div[@id='right_ear'] | ||
17 | strip: //div[contains(@class, 'PopularPosts')] | ||
18 | strip ://div[@class='discuss_page_break'] | ||
19 | strip ://div[contains(@class, 'p-content_TagList')] | ||
20 | test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/reflets.info.txt b/inc/3rdparty/site_config/standard/reflets.info.txt deleted file mode 100755 index 98a2bbfc..00000000 --- a/inc/3rdparty/site_config/standard/reflets.info.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body://div[@class='storycontent'] | ||
2 | date://div[@class='date'] | ||
3 | strip://li[@class='sharing_label'] | ||
4 | strip://a[@class='FlattrButton'] | ||
5 | test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/renenekuda.cz.txt b/inc/3rdparty/site_config/standard/renenekuda.cz.txt deleted file mode 100755 index a5361fd0..00000000 --- a/inc/3rdparty/site_config/standard/renenekuda.cz.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //*[@class='entry-title'] | ||
2 | body: //div[@class='entry-content'] | ||
3 | test_url: http://www.renenekuda.cz/recept-na-produktivitu/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/resume.se.txt b/inc/3rdparty/site_config/standard/resume.se.txt deleted file mode 100755 index 17122a9b..00000000 --- a/inc/3rdparty/site_config/standard/resume.se.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | date: //meta[@name='bi3dPubDate']/@content | ||
2 | body: //div[contains(@class, 'articleBody')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.resume.se/nyheter/media/2013/09/18/kvallspress-och-tv-slass-om-playtittarna-men-youtube-ohotat-storst/ | ||
7 | test_url: http://www.resume.se/nyheter/media/2013/09/18/cecilia-blankens-lamnar-mama-for-konkurrent/ | ||
8 | test_url: http://www.resume.se/nyheter/reklam/2013/09/18/ravelli-trodde-jag-var-med-i-blasningen/ | ||
9 | test_url: http://www.resume.se/rss-nyheter \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt deleted file mode 100755 index a0a23940..00000000 --- a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | single_page_link://a[contains(@href, 'print')] | ||
2 | |||
3 | # Grab metadata from the "printer-friendly" page, after specifying single_page_link | ||
4 | title://h2 | ||
5 | date://cite | ||
6 | test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/reuters.com.txt b/inc/3rdparty/site_config/standard/reuters.com.txt deleted file mode 100755 index 7411e62b..00000000 --- a/inc/3rdparty/site_config/standard/reuters.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h1[@class='headline3'] | ||
2 | author: substring-after(//p[@class="byline"], 'By ') | ||
3 | date: //meta[@name="REVISION_DATE"]/@content | ||
4 | body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation'] | ||
5 | strip: //li[@class='next'] | ||
6 | strip: //span[@class='articleLocation'] | ||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt deleted file mode 100755 index 30e627dc..00000000 --- a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //div[@class="article_header"]/h3 | ||
2 | author: //div[@class="autor"]/p/* | ||
3 | date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ") | ||
4 | |||
5 | move_into(//div[@class="new_article"]): //div[@class="img_article"]/img | ||
6 | |||
7 | body: //div[@class="article_content"] | ||
8 | convert_double_br_tags: yes | ||
9 | |||
10 | test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rezeptwelt.de.txt b/inc/3rdparty/site_config/standard/rezeptwelt.de.txt deleted file mode 100644 index 2093573b..00000000 --- a/inc/3rdparty/site_config/standard/rezeptwelt.de.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='step-content'] | //div[@class='global-active ingredients-box'] | ||
2 | title: //div[@class='step-1-container'] | ||
3 | |||
4 | tidy: no | ||
5 | test_url: http://www.rezeptwelt.de/backen-herzhaft-rezepte/w%C3%BCrstchen-schlangen/530372 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt deleted file mode 100755 index b0ee92dc..00000000 --- a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@id="post"] | ||
2 | strip: //div[@id="author-description"] | ||
3 | date: //span[@class="entry-date"] | ||
4 | author: //span[@class="author vcard"] | ||
5 | test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt deleted file mode 100755 index ed72915c..00000000 --- a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='post-body entry-content'] | ||
2 | strip: //div[@id='lws_0'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ritholtz.com.txt b/inc/3rdparty/site_config/standard/ritholtz.com.txt deleted file mode 100755 index d598479e..00000000 --- a/inc/3rdparty/site_config/standard/ritholtz.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class='post']/h2 | ||
2 | author: substring-before(substring-after(//div[@class='alignright']/small, 'By '),'-') | ||
3 | date: substring-after(//div[@class='alignright']/small, '-') | ||
4 | strip: //div[@class='alignleft'] | ||
5 | test_url: http://www.ritholtz.com/blog/2012/09/situational-awareness/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt b/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt deleted file mode 100755 index b0b90fb7..00000000 --- a/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | strip_id_or_class: 'sharedaddy' | ||
2 | strip_id_or_class: 'respond' | ||
3 | strip_id_or_class: 'meta' | ||
4 | test_url: http://www.robertsspaceindustries.com/news-update-ai-pilots/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt b/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt deleted file mode 100755 index da5b7bd8..00000000 --- a/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //section[@class='post text'] | ||
2 | title: //h1[@class='title'] | ||
3 | date: //p[@class='post-date'] | ||
4 | strip: //section[@class='meta-info'] | ||
5 | test_url: http://robots.thoughtbot.com/post/32455387133/four-phase-test \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt deleted file mode 100755 index f8c9541f..00000000 --- a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h2 | ||
2 | |||
3 | strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 | ||
4 | |||
5 | date: substring-after(//p[@class='info'], ' on ') | ||
6 | |||
7 | author: //p[@class='info']//a | ||
8 | test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt deleted file mode 100755 index eef8b11c..00000000 --- a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | author: //article/header/span[@class='author'] | ||
2 | title://article/header/h1 | ||
3 | body: //article | ||
4 | strip: //article/header | ||
5 | strip: //article/p[@class='metadata'] | ||
6 | footnotes: yes | ||
7 | test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rogerebert.com.txt b/inc/3rdparty/site_config/standard/rogerebert.com.txt deleted file mode 100755 index da215109..00000000 --- a/inc/3rdparty/site_config/standard/rogerebert.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: substring-before(//title,':') | ||
2 | author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY') | ||
3 | |||
4 | body: //div[@class='text'] | ||
5 | |||
6 | strip: //a[contains(@href,'printart')] | ||
7 | strip_id_or_class: enlarge_photo | ||
8 | test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt deleted file mode 100755 index 2365c42a..00000000 --- a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[contains(@class, 'inhoud')] | ||
2 | date: //span[@class ='published'] | ||
3 | author: //span[@class ='author'] | ||
4 | strip: //div[@class = 'grid_2'] | ||
5 | strip: //div[@class = 'block-citation-text'] | ||
6 | test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rollingstone.com.txt b/inc/3rdparty/site_config/standard/rollingstone.com.txt deleted file mode 100755 index 9a10a69e..00000000 --- a/inc/3rdparty/site_config/standard/rollingstone.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //h3[@class="byline"]/strong | ||
3 | |||
4 | body: //div[@id='main']/h2 | //div[@id='main']//div[@class='body'] | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | single_page_link: //a[@class='print-page'] | ||
9 | |||
10 | test_url: http://www.rollingstone.com/politics/news/the-plastic-bag-wars-20110725 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt deleted file mode 100755 index ef327691..00000000 --- a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@class='movie_content_area'] | ||
2 | strip_id_or_class: tomatometer_bar_help | ||
3 | strip_id_or_class: critic-links | ||
4 | strip_id_or_class: top-critics-numbers | ||
5 | strip_id_or_class: fan_side | ||
6 | strip_id_or_class: fblike | ||
7 | strip_id_or_class: rating_widget | ||
8 | strip_id_or_class: friend_reviews | ||
9 | prune: no | ||
10 | |||
11 | test_url: http://www.rottentomatoes.com/m/thor/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/roughtype.com.txt b/inc/3rdparty/site_config/standard/roughtype.com.txt deleted file mode 100755 index a012a67d..00000000 --- a/inc/3rdparty/site_config/standard/roughtype.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='content'] | ||
2 | strip: //p[@class='postmeta']/following::* | ||
3 | strip: //p[@class='postmeta'] | ||
4 | strip: //p[@align='left'] | ||
5 | test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/roy.gbiv.com.txt b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt deleted file mode 100755 index 6ff03de8..00000000 --- a/inc/3rdparty/site_config/standard/roy.gbiv.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip_comments: no | ||
2 | test_url: http://roy.gbiv.com/untangled/2008/rest-apis-must-be-hypertext-driven \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rpgsite.net.txt b/inc/3rdparty/site_config/standard/rpgsite.net.txt deleted file mode 100755 index 9ddbf0f2..00000000 --- a/inc/3rdparty/site_config/standard/rpgsite.net.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@id='news-text'] | ||
2 | prune: no | ||
3 | test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy | ||
4 | test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rubysfera.pl.txt b/inc/3rdparty/site_config/standard/rubysfera.pl.txt deleted file mode 100755 index d9d9a431..00000000 --- a/inc/3rdparty/site_config/standard/rubysfera.pl.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | author: //div[contains(@class, 'author_text')]/h4/text() | ||
2 | date: //li[@class='date'] | ||
3 | |||
4 | # stripping excessive tags | ||
5 | strip: //div[contains(@class, 'entry_meta')] | ||
6 | strip: //div[contains(@class, 'single_meta')] | ||
7 | strip: //br[contains(@class, 'clear')] | ||
8 | strip: //h3[contains(., 'Komentarz')] | ||
9 | test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ruhlman.com.txt b/inc/3rdparty/site_config/standard/ruhlman.com.txt deleted file mode 100755 index e54b0f0e..00000000 --- a/inc/3rdparty/site_config/standard/ruhlman.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | author: ///span[@class='author vcard'] | ||
3 | date: //abbr[@class='published'] | ||
4 | body: //div[@class='entry-content'] | ||
5 | |||
6 | test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ruttloff.org.txt b/inc/3rdparty/site_config/standard/ruttloff.org.txt deleted file mode 100755 index 43e130a4..00000000 --- a/inc/3rdparty/site_config/standard/ruttloff.org.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | author: //a[@class='author'] | ||
2 | tidy: no | ||
3 | test_url: http://ruttloff.org/2012/06/13/intervention \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/salon.com.txt b/inc/3rdparty/site_config/standard/salon.com.txt deleted file mode 100755 index 2b47f744..00000000 --- a/inc/3rdparty/site_config/standard/salon.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | author: (//span[@class="byline"]/a)[1] | ||
3 | date: //span[contains(@class, "toLocalTime")] | ||
4 | body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")] | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | # deal with singleton links | ||
9 | single_page_link: (//h1/a[contains(@href, '/singleton')])[1] | ||
10 | |||
11 | test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/salzburg.com.txt b/inc/3rdparty/site_config/standard/salzburg.com.txt deleted file mode 100755 index 464f99f1..00000000 --- a/inc/3rdparty/site_config/standard/salzburg.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //p[@class='teaser1 darkgrey myriad'] | ||
2 | move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear'] | ||
3 | strip: //div[@class='hidden'] | ||
4 | strip: //div[@id='article_related_source'] | ||
5 | |||
6 | test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sanpedrosun.com.txt b/inc/3rdparty/site_config/standard/sanpedrosun.com.txt deleted file mode 100755 index 3f19cced..00000000 --- a/inc/3rdparty/site_config/standard/sanpedrosun.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //div[contains(@class, 'post')]//h1 | ||
2 | date: //div[contains(@class, 'post')]//h6 | ||
3 | body: //div[contains(@class, 'entry')] | ||
4 | strip_id_or_class: post_stats | ||
5 | strip_id_or_class: related-posts | ||
6 | strip_id_or_class: after_story | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.sanpedrosun.com/community-and-society/2013/06/05/little-angelspre-school-talent-show/ | ||
10 | test_url: http://www.sanpedrosun.com/feed/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/saveyourself.ca.txt b/inc/3rdparty/site_config/standard/saveyourself.ca.txt deleted file mode 100755 index 5a5605d9..00000000 --- a/inc/3rdparty/site_config/standard/saveyourself.ca.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | title://h1 | ||
2 | |||
3 | # my section divs seem to interfere with the Instapaper parser, so I ditch 'em | ||
4 | dissolve://div[contains(@class, 'section')] | ||
5 | |||
6 | #these don't seem to be necessary, but just in case | ||
7 | strip_id_or_class:'masthead' | ||
8 | strip_id_or_class:'footer' | ||
9 | |||
10 | #again, Instapaper seems to understand where my content is, but just in case | ||
11 | body://div[@id='content'] | ||
12 | |||
13 | # in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing | ||
14 | strip_id_or_class:'screen-only' | ||
15 | strip_id_or_class:'no-print' | ||
16 | |||
17 | #other misc removals and simplifications | ||
18 | strip_id_or_class:'popup' | ||
19 | strip_id_or_class:'ZoomSpin' | ||
20 | |||
21 | #I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes | ||
22 | wrap_in(blockquote)://div[contains(@class, 'sidebar')] | ||
23 | wrap_in(blockquote)://div[contains(@class, 'meta')] | ||
24 | wrap_in(blockquote)://p[contains(@class, 'meta')] | ||
25 | test_url: http://saveyourself.ca/tutorials/low-back-pain.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sayidaty.net.txt b/inc/3rdparty/site_config/standard/sayidaty.net.txt deleted file mode 100755 index 2d9f1884..00000000 --- a/inc/3rdparty/site_config/standard/sayidaty.net.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | date: //meta[@property='article:published_time']/@content | ||
2 | body: (//div[contains(@class, 'article-slider')]//img)[1] | //div[contains(@class, 'bottom-article-con')] | ||
3 | |||
4 | test_url: http://www.sayidaty.net/taxonomy/term/10/all/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sbnation.com.txt b/inc/3rdparty/site_config/standard/sbnation.com.txt deleted file mode 100755 index 41b36755..00000000 --- a/inc/3rdparty/site_config/standard/sbnation.com.txt +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | title: //h1[@id='stream_title'] | ||
2 | |||
3 | # Author and date don't work | ||
4 | author: //div[@class='byline'] | ||
5 | date: //div[@class='date-stamp'] | ||
6 | |||
7 | body: //div[@class='node-article'] | ||
8 | |||
9 | strip_id_or_class: fb-like-box | ||
10 | strip_id_or_class: stream-fb-like | ||
11 | strip_id_or_class: social-meta | ||
12 | strip_id_or_class: social-spoken | ||
13 | strip_id_or_class: twitter-share-button | ||
14 | strip_id_or_class: twitter-follow-button | ||
15 | strip_id_or_class: spinner_node_list | ||
16 | strip_id_or_class: node-sort-link | ||
17 | strip_id_or_class: stream_title | ||
18 | strip_id_or_class: stream_summary | ||
19 | strip_id_or_class: update-count-container | ||
20 | strip_id_or_class: major-updates | ||
21 | strip_id_or_class: newsletter-slide | ||
22 | strip_id_or_class: author-mini-profile | ||
23 | strip_id_or_class: byline | ||
24 | strip_id_or_class: header | ||
25 | strip_id_or_class: footer | ||
26 | |||
27 | # Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns | ||
28 | test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/schneier.com.txt b/inc/3rdparty/site_config/standard/schneier.com.txt deleted file mode 100755 index 0074a86a..00000000 --- a/inc/3rdparty/site_config/standard/schneier.com.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | author: //p[@class='mastname'] | ||
2 | |||
3 | body: //div[@class='indivbody'] | ||
4 | date: //div[@class='indivbody']/h2[1] | ||
5 | |||
6 | # Remove blog title. Specify first occurrence in case h1 is used in article | ||
7 | strip: //div[@class='indivbody']/h1[1] | ||
8 | |||
9 | # Remove blog description (the first p element) | ||
10 | strip: //div[@class='indivbody']/p[1] | ||
11 | |||
12 | # Remove navigation (second p element) | ||
13 | strip: //div[@class='indivbody']/p[2] | ||
14 | |||
15 | # Remove duplicate of article title. Specify first occurrence in case h3 is used in article | ||
16 | strip: //div[@class='indivbody']/h3[1] | ||
17 | |||
18 | # Remove publishing date, it's extracted by rule above | ||
19 | strip: //div[@class='indivbody']/h2[1] | ||
20 | |||
21 | # Remove duplicate of date at end, and newsletter signup | ||
22 | strip: //p[@class='posted'] | ||
23 | |||
24 | # Leave date at top | ||
25 | test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/science.orf.at.txt b/inc/3rdparty/site_config/standard/science.orf.at.txt deleted file mode 100755 index c4b21834..00000000 --- a/inc/3rdparty/site_config/standard/science.orf.at.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@class="storybox"] | ||
2 | title: //div[@class="storybox"]//h1 | ||
3 | strip: //p[@class='metaline'] | ||
4 | date: substring-after(//*[@class='time'],'Erstellt am') | ||
5 | strip: //div[@class='fact'] | ||
6 | strip: //p[@class='backlink'] | ||
7 | strip: //div[@class='mailto'] | ||
8 | strip: //div[@id='forumDisclaimer'] | ||
9 | strip: //div[@class='forum'] | ||
10 | |||
11 | test_url: http://science.orf.at/stories/1700900/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scienceblogs.de.txt b/inc/3rdparty/site_config/standard/scienceblogs.de.txt deleted file mode 100755 index b0dec3d2..00000000 --- a/inc/3rdparty/site_config/standard/scienceblogs.de.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a | ||
2 | |||
3 | author: //div[@class='details clear']//a[@class='hi'] | ||
4 | body: //div[@class='title'] | ||
5 | strip: //p[@class='entrypagination'] | ||
6 | strip: //p[@class='details_top'] | ||
7 | date: //p[@class='details_top'] | ||
8 | title: //div[@class='title']/h1 | ||
9 | strip: //p[@class='details'] | ||
10 | strip: //p[@class='details_bottom'] | ||
11 | |||
12 | test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scienceticker.info.txt b/inc/3rdparty/site_config/standard/scienceticker.info.txt deleted file mode 100755 index 2a06f734..00000000 --- a/inc/3rdparty/site_config/standard/scienceticker.info.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body: //div[@class='post'] | ||
2 | title: //h1[@id='singlePageTitle'] | ||
3 | date: substring-before(//small,'• Rubrik') | ||
4 | |||
5 | strip: //div[@class='post-ratings'] | ||
6 | strip: //div[@class='post-ratings-loading'] | ||
7 | strip: //a[@title='Empfehlen Sie den Text weiter!'] | ||
8 | strip: //a[@title='Drucken'] | ||
9 | strip: //div[@class='share'] | ||
10 | |||
11 | test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scientificamerican.com.txt b/inc/3rdparty/site_config/standard/scientificamerican.com.txt deleted file mode 100755 index 1b3f31cf..00000000 --- a/inc/3rdparty/site_config/standard/scientificamerican.com.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | # | ||
2 | # After site revisions at SciAm, this configuration does | ||
3 | # not work, especially for multi-page articles. For | ||
4 | # every article there is now a "Print" link which | ||
5 | # is far more reliable. So this configuration should be | ||
6 | # removed or disabled. | ||
7 | # 2/3/13 | ||
8 | # | ||
9 | |||
10 | # meta data | ||
11 | title://h1[@class = 'articleTitle'] | ||
12 | author:substring-after(//span[@class = 'byline'],'By ') | ||
13 | date:substring-before(//span[@class = 'datestamp'],'|') | ||
14 | |||
15 | #body content | ||
16 | body://div[@id = 'articleContent'] | ||
17 | #next_page_link://li[@id = 'flairPagination']/a[last()] | ||
18 | |||
19 | single_page_link: //a[contains(@href, 'print=true')] | ||
20 | |||
21 | #cleanup | ||
22 | strip://div[@class = 'fsgBooks'] | ||
23 | |||
24 | test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state | ||
25 | test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scilogs.de.txt b/inc/3rdparty/site_config/standard/scilogs.de.txt deleted file mode 100755 index b24d7844..00000000 --- a/inc/3rdparty/site_config/standard/scilogs.de.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //div[@class='date']/a | ||
3 | date: substring-after(//div[@class='date'], ',') | ||
4 | body: //div[@class='entrybody'] | ||
5 | |||
6 | strip_id_or_class: socialshareprivacy | ||
7 | strip: //div[@class='entrybody']/br[1] | ||
8 | |||
9 | # Strip related articles | ||
10 | # 'p'-Tag strips 'Ähnliche Artikel: ' (<br> tags become <p>) | ||
11 | strip: //div[@class='entrybody']/p[last()] | ||
12 | strip: //div[@class='entrybody']/ul[last()] | ||
13 | |||
14 | convert_double_br_tags: yes | ||
15 | test_url: http://www.scilogs.de/wblogs/blog/formbar/fusion/2012-10-08/rundgang-durch-deutschlands-gr-tes-fusionsexperiment \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scotusblog.com.txt b/inc/3rdparty/site_config/standard/scotusblog.com.txt deleted file mode 100755 index 8881bb45..00000000 --- a/inc/3rdparty/site_config/standard/scotusblog.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //title | ||
2 | author: //p[@id='author-name-role']/a | ||
3 | date: substring-after(//p[@class='time'],'Posted') | ||
4 | body: //div[@id='main'] | ||
5 | strip: //div[@id='author-info'] | ||
6 | strip: //div[@id='author-links'] | ||
7 | strip: //h1 | ||
8 | test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scraplab.net.txt b/inc/3rdparty/site_config/standard/scraplab.net.txt deleted file mode 100755 index ca7ec195..00000000 --- a/inc/3rdparty/site_config/standard/scraplab.net.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@class='body'] | ||
3 | test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scripting.com.txt b/inc/3rdparty/site_config/standard/scripting.com.txt deleted file mode 100755 index 5fb0ee79..00000000 --- a/inc/3rdparty/site_config/standard/scripting.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | strip: //a[starts-with(@href, '#')] | ||
2 | strip: //*[@class='storyByline'] | ||
3 | body: //*[@class='storyPageText']/.. | ||
4 | author: string('Dave Winer') | ||
5 | date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at') | ||
6 | title: //h1 | ||
7 | footnotes: no | ||
8 | test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sct.temple.edu.txt b/inc/3rdparty/site_config/standard/sct.temple.edu.txt deleted file mode 100755 index 55f24173..00000000 --- a/inc/3rdparty/site_config/standard/sct.temple.edu.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //*[@class="entry-content"] | ||
2 | title: //h1[@class="entry-title"] | ||
3 | date: //*[@class="entry-date"] | ||
4 | author: //*[@class="author vcard"] | ||
5 | test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/searchenginejournal.com.txt b/inc/3rdparty/site_config/standard/searchenginejournal.com.txt deleted file mode 100755 index dc98af3c..00000000 --- a/inc/3rdparty/site_config/standard/searchenginejournal.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | strip: //ul[contains(@id, "social")] | ||
2 | strip: //div[contains(@class, "ts-fab-wrapper")] | ||
3 | strip: //div[contains(@id, 'gpt-ad')] | ||
4 | |||
5 | test_url: http://www.searchenginejournal.com/web-design-vs-seo-it-doesnt-make-much-sense/62294/ | ||
diff --git a/inc/3rdparty/site_config/standard/searchengineland.com.txt b/inc/3rdparty/site_config/standard/searchengineland.com.txt deleted file mode 100755 index 9ccc5898..00000000 --- a/inc/3rdparty/site_config/standard/searchengineland.com.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article ') and (contains(concat(' ',normalize-space(@class),' '),' clear '))] | ||
2 | title: //div[@class="storyBox"]/h1 | ||
3 | author: //a[@rel="author"] | ||
4 | date: substring-before(//span[@class="dateline"], 'by') | ||
5 | |||
6 | #Removes related content but cleans up article text | ||
7 | strip: //h1 | ||
8 | strip: //p[@class="homeStory tdmSideInfo"] | ||
9 | strip: //div[@id="bylineShare"] | ||
10 | strip: //script | ||
11 | strip: //hr | ||
12 | |||
13 | strip_id_or_class: homeStory | ||
14 | strip_id_or_class: authorpic | ||
15 | strip_id_or_class: insideComments | ||
16 | strip_id_or_class: authorbio | ||
17 | strip_id_or_class: gpt-ad-sel-cube | ||
18 | strip_id_or_class: smxTextAd | ||
19 | |||
20 | test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/seattletransitblog.com.txt b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt deleted file mode 100755 index 5129c069..00000000 --- a/inc/3rdparty/site_config/standard/seattletransitblog.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h3[@class="storytitle"] | ||
2 | date: //div[@class='meta'] | ||
3 | body: //div[@class='storycontent'] | ||
4 | |||
5 | test_url: http://seattletransitblog.com/2012/06/19/times-st-louis-interested-in-buying-waterfront-streetcars/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sebbo.net.txt b/inc/3rdparty/site_config/standard/sebbo.net.txt deleted file mode 100755 index b6d9c92d..00000000 --- a/inc/3rdparty/site_config/standard/sebbo.net.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: substring-before(//title, '«') | ||
2 | body: //div[@class = 'entry'] | ||
3 | strip_id_or_class: 'postmetabox' | ||
4 | test_url: http://sebbo.net/2010/12/akkus/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/select.yeeyan.org.txt b/inc/3rdparty/site_config/standard/select.yeeyan.org.txt deleted file mode 100755 index 6e98b149..00000000 --- a/inc/3rdparty/site_config/standard/select.yeeyan.org.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://select.yeeyan.org/view/18312/332365 | ||
3 | # http://select.yeeyan.org/view/365295/333788 | ||
4 | # http://select.yeeyan.org/view/174464/332336 | ||
5 | |||
6 | tidy:no | ||
7 | prune:no | ||
8 | title://h1 | ||
9 | author: //div[@class='sa_author']/span/a | ||
10 | date: substring-after(//div[@class='sa_author']/span/following-sibling::span, ':') | ||
11 | body: //div[@class='sa_left closetag'] | ||
12 | wrap_in(b)://div[@class='sa_abstract'] | ||
13 | |||
14 | strip://ul[@class='sa_next clearfix'] | ||
15 | strip: //div[@class='sa_author'] | ||
16 | strip: //div[@class='sa_title_box'] | ||
17 | |||
18 | test_url: http://select.yeeyan.org/view/258033/333481 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/seriouseats.com.txt b/inc/3rdparty/site_config/standard/seriouseats.com.txt deleted file mode 100755 index 5e633470..00000000 --- a/inc/3rdparty/site_config/standard/seriouseats.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | # clean up recipe pages | ||
4 | strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] | ||
5 | |||
6 | #recipe pages | ||
7 | strip_id_or_class: "recipe-feedback" | ||
8 | strip_id_or_class: "comments" | ||
9 | strip_id_or_class: "procedure-number" | ||
10 | strip_id_or_class: "more-with-author" | ||
11 | |||
12 | #slice | ||
13 | strip_id_or_class: "inner" | ||
14 | |||
15 | test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sf.curbed.com.txt b/inc/3rdparty/site_config/standard/sf.curbed.com.txt deleted file mode 100755 index 4c10e9c7..00000000 --- a/inc/3rdparty/site_config/standard/sf.curbed.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[@class='post-title'] | ||
2 | author: //div[@class='post-byline']/a | ||
3 | date: substring-before(//div[@class='post-byline'], ', by') | ||
4 | |||
5 | body: //div[@class='post-body'] | ||
6 | dissolve: //noscript | ||
7 | test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sf.eater.com.txt b/inc/3rdparty/site_config/standard/sf.eater.com.txt deleted file mode 100755 index 1e7c85a0..00000000 --- a/inc/3rdparty/site_config/standard/sf.eater.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[@class="post-title"] | ||
2 | author: //div[@class="post-byline"]/a | ||
3 | date: substring-before(//div[@class='post-byline'], ', by') | ||
4 | |||
5 | body: //div[@class='post-body'] | ||
6 | strip_id_or_class: post-kicker | ||
7 | test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sfgate.com.txt b/inc/3rdparty/site_config/standard/sfgate.com.txt deleted file mode 100755 index 54691122..00000000 --- a/inc/3rdparty/site_config/standard/sfgate.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: /html/head/title | ||
2 | |||
3 | body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')] | ||
4 | author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn'] | ||
5 | date: //div[@class = 'articleheadings']/span[@class = 'updated'] | ||
6 | strip: //div[div[contains(@class, 'imgbox')]] | ||
7 | |||
8 | body: //div[@class = 'blogitem'] | ||
9 | author: //p[@class="credit"]/span[@class="author"]/a[position() = 1] | ||
10 | date: //span[@class = 'pubdate'] | ||
11 | |||
12 | test_url: http://www.sfgate.com/columnists/garchik/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sfweekly.com.txt b/inc/3rdparty/site_config/standard/sfweekly.com.txt deleted file mode 100755 index 73c3017e..00000000 --- a/inc/3rdparty/site_config/standard/sfweekly.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[contains(@class, 'content_body')] | ||
2 | strip_id_or_class: det_rel | ||
3 | test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/shabayek.com.txt b/inc/3rdparty/site_config/standard/shabayek.com.txt deleted file mode 100755 index 9a0d60ae..00000000 --- a/inc/3rdparty/site_config/standard/shabayek.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | date: //span[@class='date'] | ||
2 | body: //div[@class='post_content'] | ||
3 | test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/shawnblanc.net.txt b/inc/3rdparty/site_config/standard/shawnblanc.net.txt deleted file mode 100755 index bd8438f7..00000000 --- a/inc/3rdparty/site_config/standard/shawnblanc.net.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title://*[@class='primary']/h1 | ||
2 | date: //*[@class='articledate'] | ||
3 | author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.') | ||
4 | body: //div[@class='primary'] | ||
5 | footnotes: yes | ||
6 | strip: //*[@class='primary']/h1 | ||
7 | strip: //*[@class='articledate'] | ||
8 | strip: //*[@class='detailsarticle'] | ||
9 | strip: //*[@class='endnav'] | ||
10 | strip: //*[@class='endmeta'] | ||
11 | test_url: http://shawnblanc.net/2011/11/kindle-touch-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/shifteleven.com.txt b/inc/3rdparty/site_config/standard/shifteleven.com.txt deleted file mode 100755 index 43fd871d..00000000 --- a/inc/3rdparty/site_config/standard/shifteleven.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[ @class='entry-content' ] | ||
2 | |||
3 | strip: //div[ contains(@class, 'sharing') ] | ||
4 | |||
5 | date: //div[ @class='entry-meta' ]/a | ||
6 | test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/siasat.pk.txt b/inc/3rdparty/site_config/standard/siasat.pk.txt deleted file mode 100755 index b10e12de..00000000 --- a/inc/3rdparty/site_config/standard/siasat.pk.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/signalscv.com.txt b/inc/3rdparty/site_config/standard/signalscv.com.txt deleted file mode 100755 index 2d3c388e..00000000 --- a/inc/3rdparty/site_config/standard/signalscv.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | author: //span[contains(@class, 'byline_1')] | ||
2 | date: //span[@class='posted_date'] | ||
3 | body: //*[contains(@class, 'bigimage_container') or contains(@class, 'overlay_text') or contains(@id, 'articlebody')] | ||
4 | |||
5 | strip_id_or_class: leftWrapper | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.signalscv.com/section/46/article/102948/ | ||
10 | test_url: http://www.signalscv.com/syndication/feeds/rss/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/simonwillison.net.txt b/inc/3rdparty/site_config/standard/simonwillison.net.txt deleted file mode 100755 index 69999698..00000000 --- a/inc/3rdparty/site_config/standard/simonwillison.net.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[contains(@class, "entry")] | ||
2 | |||
3 | date: //div[contains(@class, "entryFooter")]/a | ||
4 | |||
5 | test_url: http://simonwillison.net/2009/Oct/22/redis/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt deleted file mode 100755 index 46e2d5f2..00000000 --- a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@class='post-body'] | ||
2 | strip: //div[@id='lws_0'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/singularityhub.com.txt b/inc/3rdparty/site_config/standard/singularityhub.com.txt deleted file mode 100755 index 3999d4d4..00000000 --- a/inc/3rdparty/site_config/standard/singularityhub.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body://div[contains(@class,"entry-content")] | ||
2 | test_url: http://singularityhub.com/2011/05/21/google-invades-your-home-android-phones-control-your-appliances-and-accessories-video/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt deleted file mode 100755 index 0d05c40e..00000000 --- a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@class='headline']//h2 | ||
2 | body: //div[contains(@class, 'storycontent')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti- \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sivers.org.txt b/inc/3rdparty/site_config/standard/sivers.org.txt deleted file mode 100755 index a88f30d7..00000000 --- a/inc/3rdparty/site_config/standard/sivers.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //article[@class='post']/header[@class='wrapper']//h1/a | ||
2 | author: //header[@id='masthead']//h1/a | ||
3 | date: //article[@class='post']/header[@class='wrapper']//p[@class='postdate'] | ||
4 | body: //div[@id='body-content'] | ||
5 | |||
6 | test_url: http://sivers.org/delegate/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/skanesfria.se.txt b/inc/3rdparty/site_config/standard/skanesfria.se.txt deleted file mode 100755 index a0ddac79..00000000 --- a/inc/3rdparty/site_config/standard/skanesfria.se.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] | ||
2 | author: //article//div[contains(@class, 'field-byline')] | ||
3 | strip_id_or_class: rekommenderade | ||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: annonser | ||
6 | |||
7 | test_url: http://www.skanesfria.se/artikel/112045 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slashfilm.com.txt b/inc/3rdparty/site_config/standard/slashfilm.com.txt deleted file mode 100755 index 4d17176a..00000000 --- a/inc/3rdparty/site_config/standard/slashfilm.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: substring-before(//title,'| /Film') | ||
2 | date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by') | ||
3 | strip: //div[@class='pm-left'] | ||
4 | strip: //div[@class='pm-right'] | ||
5 | strip: //h2/span | ||
6 | next_page_link: //h2/strong/a | ||
7 | strip: //h2/strong/a | ||
8 | strip: //p[contains(text(),'we have to split this post over')] | ||
9 | strip: //p[@class='post-info'] | ||
10 | strip: //h1/a | ||
11 | strip: //img[contains(@src,'siteimages/authors')] | ||
12 | strip: //div[@id='header'] | ||
13 | strip: //div[@class='topad-right'] | ||
14 | strip: //strong[contains(text(),'Cool Posts From Around the Web:')] | ||
15 | test_url: http://www.slashfilm.com/superhero-bits-206/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slate.com.txt b/inc/3rdparty/site_config/standard/slate.com.txt deleted file mode 100755 index d5798e01..00000000 --- a/inc/3rdparty/site_config/standard/slate.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | title: //h1[@class="sl-art-head-dek"] | ||
2 | body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] | ||
3 | strip: //div[@class="department_kicker"] | ||
4 | strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] | ||
5 | strip: //div[@id="bottom_sponsored_links"] | ||
6 | strip: //div[@class="sl-art-ad-midflex"] | ||
7 | #strip: //dl | ||
8 | #strip: //p[em/a[contains(@href, 'facebook.com')]] | ||
9 | prune: no | ||
10 | |||
11 | author: //div[@id='author_bio']//a[contains(@href, '/author/')] | ||
12 | author: //a[contains(@href, '/authors.')] | ||
13 | |||
14 | date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') | ||
15 | |||
16 | single_page_link: //a[@class='sl-art-sinpage'] | ||
17 | |||
18 | test_url: http://www.slate.com/id/2274583/pagenum/all/ | ||
19 | test_url: http://www.slate.com/id/2293116/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt deleted file mode 100755 index e62a3966..00000000 --- a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | # clean up recipe pages | ||
4 | strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] | ||
5 | |||
6 | #recipe pages | ||
7 | strip_id_or_class: "recipe-feedback" | ||
8 | strip_id_or_class: "comments" | ||
9 | strip_id_or_class: "procedure-number" | ||
10 | strip_id_or_class: "more-with-author" | ||
11 | |||
12 | #slice | ||
13 | strip_id_or_class: "inner" | ||
14 | |||
15 | test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt deleted file mode 100755 index f9526945..00000000 --- a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | strip_id_or_class: postCategory | ||
2 | title: //h3[@class='postTitle'] | ||
3 | body: //div[@class='postBody'] | ||
4 | test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/smartinvestor.de.txt b/inc/3rdparty/site_config/standard/smartinvestor.de.txt deleted file mode 100755 index 85ca46de..00000000 --- a/inc/3rdparty/site_config/standard/smartinvestor.de.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //td[@class='hweissblau2'] | ||
2 | body: //p[@class='copy'] | //div[@class='Section1'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sme.sk.txt b/inc/3rdparty/site_config/standard/sme.sk.txt deleted file mode 100755 index d41612cc..00000000 --- a/inc/3rdparty/site_config/standard/sme.sk.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | date: //p[@class='autor_line']/b/text() | ||
3 | test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt deleted file mode 100755 index 3e8fee95..00000000 --- a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | # meta data | ||
2 | title://h1[@id = 'articleTitle'] | ||
3 | author:substring-after(//ul[@id = 'byLine']/li[1],'By ') | ||
4 | date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',') | ||
5 | body://div[@id = 'article-body'] | ||
6 | |||
7 | # full content | ||
8 | single_page_link://td/li[@class = 'article-singlepage']/a | ||
9 | |||
10 | # caption clean up | ||
11 | wrap_in(i)://span[@class='articleImageCaptionwide'] | ||
12 | move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p | ||
13 | |||
14 | |||
15 | # clean up | ||
16 | strip://p[@id = 'articlePaginationWrapper'] | ||
17 | strip://ul[contains(@class, 'cat-breadcrumb')] | ||
18 | strip://div [@class= 'viewMorePhotos'] | ||
19 | |||
20 | test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/smokingapples.com.txt b/inc/3rdparty/site_config/standard/smokingapples.com.txt deleted file mode 100755 index c68c1321..00000000 --- a/inc/3rdparty/site_config/standard/smokingapples.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h2[@class='custom-entry-title'] | ||
2 | author: substring-after(//span[@class='author vcard'],'by ') | ||
3 | date: substring-after(//span[@class='publ'],'Published on ') | ||
4 | body: //div[@class='postentry-content'] | ||
5 | test_url: http://smokingapples.com/software/popclip-for-mac/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/somethingawful.com.txt b/inc/3rdparty/site_config/standard/somethingawful.com.txt deleted file mode 100755 index 48547948..00000000 --- a/inc/3rdparty/site_config/standard/somethingawful.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id = 'content-area'] | ||
3 | author: //p[contains(@class, 'byline')]/a | ||
4 | autodetect_next_page: yes | ||
5 | tidy: no | ||
6 | |||
7 | strip_id_or_class: articleid | ||
8 | strip_id_or_class: logo | ||
9 | strip_id_or_class: pagebar | ||
10 | strip_id_or_class: featurenavlinks | ||
11 | strip_id_or_class: featured_frontpage | ||
12 | strip_id_or_class: sidebar | ||
13 | strip_id_or_class: footer | ||
14 | strip_id_or_class: byline | ||
15 | strip_id_or_class: logo | ||
16 | strip_id_or_class: nav_network | ||
17 | test_url: http://www.somethingawful.com/d/dungeons-and-dragons/wtf-monster-manual.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/songshuhui.net.txt b/inc/3rdparty/site_config/standard/songshuhui.net.txt deleted file mode 100755 index a9233593..00000000 --- a/inc/3rdparty/site_config/standard/songshuhui.net.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://songshuhui.net/archives/65522 | ||
3 | # http://songshuhui.net/archives/75760 | ||
4 | title://h2/span/a | ||
5 | date:substring-before(substring-after(//div[@class='atrctitle']/div, '发表于'),' |') | ||
6 | body://div[@class='entry'] | ||
7 | test_url: http://songshuhui.net/archives/74819 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sourcebooks.com.txt b/inc/3rdparty/site_config/standard/sourcebooks.com.txt deleted file mode 100755 index 86e3df5e..00000000 --- a/inc/3rdparty/site_config/standard/sourcebooks.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | #grab the actual content div | ||
2 | body: //div[@class='rt-article'] | ||
3 | |||
4 | test_url: http://www.sourcebooks.com/blog/happy-27th-birthday-sourcebooks.html | ||
diff --git a/inc/3rdparty/site_config/standard/spectator.co.uk.txt b/inc/3rdparty/site_config/standard/spectator.co.uk.txt deleted file mode 100755 index d0605ed2..00000000 --- a/inc/3rdparty/site_config/standard/spectator.co.uk.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text() | ||
2 | |||
3 | body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body'] | ||
4 | |||
5 | # Not very helpfull, the title and author are container by the same element that contains the body | ||
6 | strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link'] | ||
7 | test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt deleted file mode 100755 index aea3627e..00000000 --- a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body://div[@class="articleBody"] | ||
2 | author://p[@class="articleBodyTtl"] | ||
3 | test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/speirs.org.txt b/inc/3rdparty/site_config/standard/speirs.org.txt deleted file mode 100755 index 3bf859e3..00000000 --- a/inc/3rdparty/site_config/standard/speirs.org.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body://div[@class="body"] | ||
2 | test_url: http://speirs.org/blog/2011/5/5/ipad-trials-at-oklahoma-state.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt deleted file mode 100755 index 413e0155..00000000 --- a/inc/3rdparty/site_config/standard/spiegel.de.txt +++ /dev/null | |||
@@ -1,75 +0,0 @@ | |||
1 | # A. Niepel, narya.de@... | ||
2 | # - added single_page_link | ||
3 | # - added author for default and single page view | ||
4 | # - added date for single page view | ||
5 | # fforst@... | ||
6 | # - Fixed it | ||
7 | # bode2104@... | ||
8 | # - Fixed single_page_link | ||
9 | # - Included intro text in single page view | ||
10 | # - Added body in default view | ||
11 | |||
12 | # set body | ||
13 | tidy: no | ||
14 | # body in single page view | ||
15 | body: //div[@id="spArticleContent"] | ||
16 | # body in default view | ||
17 | body: //div[@id="spArticleSection"] | ||
18 | # body in "Fotostrecke" | ||
19 | body: //div[@id="spBigaContent"] | ||
20 | |||
21 | # set date in single page view | ||
22 | date: //div[@id="spArticleContent"]/h3 | ||
23 | # strip date | ||
24 | strip: //div[@id="spArticleContent"]/h3 | ||
25 | # set date in "Fotostrecke" | ||
26 | date: //div[@id="spBigaDatum"] | ||
27 | |||
28 | #set title in single page view | ||
29 | title: //div[@id='spArticleContent']/h2 | ||
30 | # strip title | ||
31 | strip: //div[@id='spArticleContent']/h1 | ||
32 | strip: //div[@id='spArticleContent']/h2 | ||
33 | #set title in "Fotostrecke" | ||
34 | title: //div[@class='spBigaHeadline'] | ||
35 | |||
36 | # set author | ||
37 | author: //p[@class="spAuthor"]/a | ||
38 | author: substring-after(//p[@class="spAuthor"], 'Von ') | ||
39 | # strip author | ||
40 | strip: //p[@class='spAuthor'] | ||
41 | |||
42 | # remove captions | ||
43 | strip: //*/span[@class='spPicLayerText'] | ||
44 | strip: //*/div[@class='spPanoPlayerPaneControl'] | ||
45 | strip: //*/div[@class='spCredit'] | ||
46 | strip: //*/div[@class='spCredit']/following-sibling::p | ||
47 | |||
48 | # remove ads | ||
49 | strip: //div[@class='spMInline'] | ||
50 | |||
51 | # remove photogalleries and extras | ||
52 | strip: //div[@class='spPhotoGallery'] | ||
53 | strip: //div[@class='spPhotoGallery']/following-sibling::br | ||
54 | strip: //div[@class='spAssetAlignleft'] | ||
55 | strip: //div[contains(@class,'spAsset')] | ||
56 | strip: //br[@clear='all'] | ||
57 | |||
58 | # remove community functions | ||
59 | strip: //div[@id='spSocialBookmark'] | ||
60 | strip: //div[contains(@class, 'spCommunityBox')] | ||
61 | strip: //div[contains(@class, 'spArticleNewsfeedBox')] | ||
62 | strip: //div[@class='spArticleCredit'] | ||
63 | |||
64 | # remove clutter in "Fotostrecke" | ||
65 | strip: //div[@id='spBreadcrumb'] | ||
66 | strip: //div[@id='spBigaLatestEntries'] | ||
67 | strip: //div[contains(@class, 'spBigaNavi')] | ||
68 | strip: //div[@class='spDottedLine'] | ||
69 | |||
70 | # Use link to print article for single page view | ||
71 | single_page_link: //a[contains(@href, '-druck')] | ||
72 | |||
73 | # use next link in "Fotostrecke" | ||
74 | next_page_link: //a[@class='spBigaControlForw'] | ||
75 | test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spiked-online.com.txt b/inc/3rdparty/site_config/standard/spiked-online.com.txt deleted file mode 100755 index 7ec39c2b..00000000 --- a/inc/3rdparty/site_config/standard/spiked-online.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //div[@id='articleTitleWrapper' or @id='mainFeature']//h1 | ||
2 | author: //*[@id='authorNameJob']//a | ||
3 | date: //div[@id='articleMeta']/p | ||
4 | body: //div[@id='mainFeature']//img | //div[contains(@class, 'fullText')] | ||
5 | |||
6 | test_url: http://www.spiked-online.com/newsite/article/standing_up_to_the_white-coated_gods_of_fortune/13785 | ||
7 | test_url: http://www.spiked-online.com/newsite/article/sex_box_and_the_crisis_of_intimacy/14168 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spin.com.txt b/inc/3rdparty/site_config/standard/spin.com.txt deleted file mode 100755 index 88eb454c..00000000 --- a/inc/3rdparty/site_config/standard/spin.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | tidy: no | ||
2 | body: //section[contains(@class, 'main')] | ||
3 | strip: //footer | ||
4 | strip: //a[@class='paginated'] | ||
5 | test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/splatf.com.txt b/inc/3rdparty/site_config/standard/splatf.com.txt deleted file mode 100755 index 3e05a225..00000000 --- a/inc/3rdparty/site_config/standard/splatf.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author:string('Dan Frommer/SplatF') | ||
2 | date://div[@class='postdate'] | ||
3 | body://div[@class='entry'] | ||
4 | title://div[@class='post']/h1 | ||
5 | test_url: http://www.splatf.com/2012/02/month-six/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/splitsider.com.txt b/inc/3rdparty/site_config/standard/splitsider.com.txt deleted file mode 100755 index 4bbc7aac..00000000 --- a/inc/3rdparty/site_config/standard/splitsider.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author: //div[@class='byline']/a | ||
2 | date: //div[@id='date'] | ||
3 | body: //div[@class='entry'] | ||
4 | test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sport.detik.com.txt b/inc/3rdparty/site_config/standard/sport.detik.com.txt deleted file mode 100755 index 18552d1e..00000000 --- a/inc/3rdparty/site_config/standard/sport.detik.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title://div[@class="content_detail"]/h1 | ||
2 | |||
3 | author://div[@class="author"]/strong | ||
4 | |||
5 | date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB') | ||
6 | |||
7 | body://div[@class='text_detail'] | ||
8 | test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sport.orf.at.txt b/inc/3rdparty/site_config/standard/sport.orf.at.txt deleted file mode 100755 index f0be85c7..00000000 --- a/inc/3rdparty/site_config/standard/sport.orf.at.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | single_page_link: //div[@id='content']//p[@class='readMore']/a | ||
2 | |||
3 | title: //div[@class='hidden offscreen']/h2 | ||
4 | body: //div[@id="storyText"] | ||
5 | move_into(//div[@id='storyText']): //div[@class='fact'] | ||
6 | strip: //small[@class='credit'] | ||
7 | strip: //small[@class='caption'] | ||
8 | date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') | ||
9 | strip: //p[@class='toplink'] | ||
10 | |||
11 | test_url: http://sport.orf.at/stories/2084851/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sport365.fr.txt b/inc/3rdparty/site_config/standard/sport365.fr.txt deleted file mode 100755 index 8688f40b..00000000 --- a/inc/3rdparty/site_config/standard/sport365.fr.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //h2[contains(@class, 'body_head')] | //div[@id='img_article' or contains(@class, 'body_content')] | ||
2 | body: //div[contains(@class, 'cpanel')]//div[contains(@class, 'thumbnails')] | ||
3 | prune: no | ||
4 | strip: //div[starts-with(@class, 'actu_')] | ||
5 | strip: //div[contains(@class, 'data')] | ||
6 | |||
7 | test_url: http://www.sport365.fr/basketball/nba/new-york-accord-avec-toronto-pour-bargnani-1038773.shtml | ||
8 | test_url: http://www.sport365.fr/rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt deleted file mode 100755 index 8c21ef2b..00000000 --- a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
2 | body: //div[contains(@class, 'article')] | ||
3 | strip: //div[contains(@class, 'mod-inline')] | ||
4 | strip: //*/span[@class='page-actions']/a | ||
5 | strip: //*/span[@class='page-actions']/a | ||
6 | strip: //div[@class='page-actions']/* | ||
7 | strip: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
8 | strip: //div[@class='mod-blog-navigation'] | ||
9 | strip: //div[@class='monthday'] | ||
10 | strip: //div[@class='time'] | ||
11 | strip: //div[@class='timeofday'] | ||
12 | test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt deleted file mode 100755 index b0f57e2c..00000000 --- a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //div[@id='article']/div[@class='hd']/h1 | ||
2 | body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0'] | ||
3 | strip: //div[@class='foot'] | ||
4 | strip: //div[@id='sidebar']//div[@class='ft'] | ||
5 | strip: //p[@id='byline']//em | ||
6 | tidy: no | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sportschau.de.txt b/inc/3rdparty/site_config/standard/sportschau.de.txt deleted file mode 100755 index 1e58b520..00000000 --- a/inc/3rdparty/site_config/standard/sportschau.de.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | title://div[@id='ardContent']/h1 | ||
2 | |||
3 | author://p[@id='ardAutor'] | ||
4 | author://span[@id='ardQuelle'] | ||
5 | author:string('sportschau.de') | ||
6 | |||
7 | date:substring-after(//span[@id='ardStand'], 'Stand: ') | ||
8 | |||
9 | body://div[@id='ardContent'] | ||
10 | |||
11 | strip://div[@id='ardContent']/h1 | ||
12 | strip://p[@id='ardAutor'] | ||
13 | strip: //div[@class='embeddedPlayer_clipinfo'] | ||
14 | strip: //div[@class='ardMehrZumThemaRechts'] | ||
15 | strip: //*[contains(@class, 'inv')] | ||
16 | |||
17 | strip: //p[@id='ardAbbinder'] | ||
18 | strip: //div[@class='socialBookmarks'] | ||
19 | strip: //div[@id='ardContentEnd'] | ||
20 | strip: //div[@id='ardDisclaimer'] | ||
21 | strip: //div[@id='ardRechteSpalte'] | ||
22 | test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt deleted file mode 100755 index b3da8138..00000000 --- a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | # main sportsillustrated.com articles | ||
2 | # | ||
3 | body: //div[@id="cnnStoryContent"] | ||
4 | title: //div[@id="cnnStoryHeadline"]//h1 | ||
5 | author: //div[@id="cnnSubBanner"]//strong | ||
6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") | ||
7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") | ||
8 | |||
9 | # kill ugly font buttons | ||
10 | strip: //div[@id="cnnSCFontButtons"] | ||
11 | |||
12 | # kill misc filler videos & etc | ||
13 | strip: //div[@class="cnnDivideContent"] | ||
14 | strip: //*[@class="cnnTMbox"] | ||
15 | |||
16 | # si vault articles | ||
17 | # ------------- | ||
18 | body: //div[@class="siv_artPara"] | ||
19 | title: //div[@class="siv_artHeader"]//h1 | ||
20 | author: //div[@class="byline"] | ||
21 | date: //div[@class="date"] | ||
22 | |||
23 | next_page_link: //div[@id='cnnStoryContinue']/a | ||
24 | strip_id_or_class: cnnstorypagination | ||
25 | |||
26 | test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sprengsatz.de.txt b/inc/3rdparty/site_config/standard/sprengsatz.de.txt deleted file mode 100755 index 5b683811..00000000 --- a/inc/3rdparty/site_config/standard/sprengsatz.de.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h2 | ||
2 | author: string('Michael Spreng') | ||
3 | date: //div[@class='date'] | ||
4 | body: //div[@class='entry'] | ||
5 | test_url: http://www.sprengsatz.de/?p=3691 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sqlite.org.txt b/inc/3rdparty/site_config/standard/sqlite.org.txt deleted file mode 100755 index 15763c32..00000000 --- a/inc/3rdparty/site_config/standard/sqlite.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@id='ff-body'] | ||
2 | |||
3 | replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center> | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.sqlite.org/fileformat2.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt deleted file mode 100755 index 8eae13ed..00000000 --- a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class='content'] | ||
2 | date: substring-before( //div[@class='unit dateAndNotes'], 'with') | ||
3 | title: //h3 | ||
4 | test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stackoverflow.com.txt b/inc/3rdparty/site_config/standard/stackoverflow.com.txt deleted file mode 100755 index bb95e93a..00000000 --- a/inc/3rdparty/site_config/standard/stackoverflow.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2 | ||
2 | |||
3 | replace_string(<div class="user-details"><br></div>): <!-- nothing --> | ||
4 | replace_string(<div class="vote">): <div class="vote"><h3>Vote count: | ||
5 | |||
6 | strip_id_or_class: vote-up | ||
7 | strip_id_or_class: vote-down | ||
8 | strip_id_or_class: star-off | ||
9 | strip_id_or_class: favoritecount | ||
10 | strip_id_or_class: -share | ||
11 | strip_id_or_class: badgecount | ||
12 | |||
13 | |||
14 | test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt deleted file mode 100755 index a0f1587c..00000000 --- a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //div[@class='articleLeft']/h3 | ||
2 | |||
3 | author: substring-after(//span[@class='articleAuthor']/a,'By ') | ||
4 | |||
5 | date: substring-before(//span[@class='articleDateTime'],'in ') | ||
6 | |||
7 | body: //div[@class='articleLeft'] | ||
8 | strip: //div[@class='articleMoreNews'] | ||
9 | strip: //div[@class='articleLeft']/h3 | ||
10 | strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix'] | ||
11 | |||
12 | # Remove duplicate title from text | ||
13 | strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3 | ||
14 | test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/standard.co.uk.txt b/inc/3rdparty/site_config/standard/standard.co.uk.txt deleted file mode 100755 index 71a2bda1..00000000 --- a/inc/3rdparty/site_config/standard/standard.co.uk.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | autodetect_next_page: no | ||
2 | footnotes: no | ||
3 | dissolve: //div[@class="column-2"]//div[@class="widget"] | ||
4 | dissolve: //div[@class="column-2"]//div | ||
5 | |||
6 | author: //div[@class="innerbyline"]/a | ||
7 | strip: //div[@class="innerbyline"]/a | ||
8 | |||
9 | strip: //p[@class="dateline"] | ||
10 | date: //p[@class="dateline"] | ||
11 | |||
12 | title: //h1[@class="title"] | ||
13 | author: //div[@class="innerbyline"]/a | ||
14 | date: //p[@class="dateline"] | ||
15 | body: //div[@class="column-2"] | ||
16 | test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/staradvertiser.com.txt b/inc/3rdparty/site_config/standard/staradvertiser.com.txt deleted file mode 100755 index 254e2c2b..00000000 --- a/inc/3rdparty/site_config/standard/staradvertiser.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1[@id='storyTitle'] | ||
2 | author: substring-after(//span[@class='hsa_postCredit'], 'By ') | ||
3 | date://span[@class='hsa_dateStamp'] | ||
4 | body: //div[@class='storytext'] | ||
5 | strip_id_or_class: insideStoryAd | ||
6 | strip_id_or_class: printDesc | ||
7 | strip_id_or_class: sb_2010_story_tools | ||
8 | strip_id_or_class: FBConnectButton_Text | ||
9 | strip_id_or_class: breadcrumbs | ||
10 | prune: no | ||
11 | test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stephenfry.com.txt b/inc/3rdparty/site_config/standard/stephenfry.com.txt deleted file mode 100755 index efd1ec2b..00000000 --- a/inc/3rdparty/site_config/standard/stephenfry.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: /html/head/meta[@name='title']/@content | ||
2 | author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a | ||
3 | date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')] | ||
4 | |||
5 | body: //div[@class='entry-content'] | ||
6 | |||
7 | single_page_link: //p[@class='pagination']/a | ||
8 | test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stlbeacon.org.txt b/inc/3rdparty/site_config/standard/stlbeacon.org.txt deleted file mode 100755 index 75379a9c..00000000 --- a/inc/3rdparty/site_config/standard/stlbeacon.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: article/h1 | ||
2 | author: //p[@class='byline'] | ||
3 | date: //p[@class='date'] | ||
4 | body: //div[@class='body'] | ||
5 | test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt deleted file mode 100755 index 2f4f8cb8..00000000 --- a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | strip_id_or_class: 'left' | ||
2 | strip_id_or_class: 'right' | ||
3 | strip_id_or_class: 'block-belowcontent' | ||
4 | |||
5 | test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt b/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt deleted file mode 100755 index cc8c28b8..00000000 --- a/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] | ||
2 | author: //article//div[contains(@class, 'field-byline')] | ||
3 | strip_id_or_class: rekommenderade | ||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: annonser | ||
6 | |||
7 | test_url: http://www.stockholmsfria.nu/artikel/112068 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/straightdope.com.txt b/inc/3rdparty/site_config/standard/straightdope.com.txt deleted file mode 100755 index f01d7ad1..00000000 --- a/inc/3rdparty/site_config/standard/straightdope.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@id='article' or @id='current_illustration'] | ||
2 | title: //div[@id='article']//h1 | ||
3 | date: //div[@id='article']//div[@class='date'] | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.straightdope.com/columns/read/947/whatever-happened-to-adoption-of-the-metric-system-in-the-u-s \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/streetsblog.net.txt b/inc/3rdparty/site_config/standard/streetsblog.net.txt deleted file mode 100755 index 6cf03ca6..00000000 --- a/inc/3rdparty/site_config/standard/streetsblog.net.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h2[@class="post-title"] | ||
2 | date: //span[@class="post-date"] | ||
3 | body: //div[@class="post-entry"] | ||
4 | |||
5 | #This is also good for *.streetsblog.org, for example: | ||
6 | #http://dc.streetsblog.org/2011/10/21/friday-job-market/ | ||
7 | test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stuff.co.nz.txt b/inc/3rdparty/site_config/standard/stuff.co.nz.txt deleted file mode 100755 index 3756092c..00000000 --- a/inc/3rdparty/site_config/standard/stuff.co.nz.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | title://div[@id='left_col']/h1 | ||
2 | author:substring-after(//span[contains(@class,'storycredit')],'BY ') | ||
3 | author://span[contains(@class,'storycredit')] | ||
4 | date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ') | ||
5 | date://div[contains(@class,'toolbox_date')] | ||
6 | body://div[@id='left_col'] | ||
7 | |||
8 | strip_id_or_class: toolbox | ||
9 | strip_id_or_class: story_features | ||
10 | strip_id_or_class: sharebox_new | ||
11 | strip_id_or_class: related_box | ||
12 | strip_id_or_class: sponsored_links | ||
13 | strip_id_or_class: hidden_ad | ||
14 | strip_id_or_class: story_content_top | ||
15 | strip_id_or_class: total_number | ||
16 | strip_id_or_class: sort_order | ||
17 | strip_id_or_class: subscribe_order | ||
18 | |||
19 | strip://div[contains(@class,'ad_story')] | ||
20 | |||
21 | test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge | ||
22 | test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stumbleupon.com.txt b/inc/3rdparty/site_config/standard/stumbleupon.com.txt deleted file mode 100755 index 9adc3c50..00000000 --- a/inc/3rdparty/site_config/standard/stumbleupon.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link: //iframe[@id='tb-stumble-frame']/@src | ||
2 | |||
3 | test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/subtraction.com.txt b/inc/3rdparty/site_config/standard/subtraction.com.txt deleted file mode 100755 index 9ba6eb77..00000000 --- a/inc/3rdparty/site_config/standard/subtraction.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | title: //*[@id='posts']/div[1]/h2 | ||
2 | author: //*[@id='posts']/div[1]/div[2]/span[2]/a | ||
3 | date: //*[@class='date'] | ||
4 | body: //div[@class='body-lead'] | ||
5 | |||
6 | # take out the bit saying 'body' | ||
7 | strip: //div[@class='body-lead']/div[@class='info-label'] | ||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt deleted file mode 100755 index 74b8d451..00000000 --- a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | # 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... | ||
2 | |||
3 | single_page_link: //a[ contains( @href, "/2.220/" ) ] | ||
4 | |||
5 | body: //article[@id="sitecontent"]/section[@class="body"] | ||
6 | author: //address[@class="author"] | ||
7 | date: //div[@class="header"]//h1//span[@class="updated"] | ||
8 | wrap_in(small): //div[@class="footer"] | ||
9 | wrap_in(i): //figcaption/h3 | ||
10 | dissolve: //figcaption//h3 | ||
11 | dissolve: //figure/div[@class="body"] | ||
12 | dissolve: //figure/a | ||
13 | |||
14 | strip: //figure[ not( contains(@class, "zoomimage" ) ) ] | ||
15 | strip: //div[@data-onlineonly="true"] | ||
16 | strip: //address[@class="author"] | ||
17 | |||
18 | test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/summify.com.txt b/inc/3rdparty/site_config/standard/summify.com.txt deleted file mode 100755 index 1128e1bb..00000000 --- a/inc/3rdparty/site_config/standard/summify.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip_id_or_class: toolbar | ||
2 | test_url: http://summify.com/story/Tmt1YQ0JBgKTAHwK/www.nybooks.com/articles/archives/2003/jan/16/fixed-opinions-or-the-hinge-of-history/?pagination=false \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/suntimes.com.txt b/inc/3rdparty/site_config/standard/suntimes.com.txt deleted file mode 100755 index 6d4594cf..00000000 --- a/inc/3rdparty/site_config/standard/suntimes.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //div[@class='story-details']/h1 | ||
2 | date: //span[@class='date-time'] | ||
3 | Author: substring-after(//p[@class='by-line'], 'By ') | ||
4 | |||
5 | strip: //div[@class='videoThumbnails'] | ||
6 | strip: //div[@class='ad-square2-container'] | ||
7 | strip: //div[@class='homeDeliveryContainer5'] | ||
8 | |||
9 | strip: //div[@class='image-description'] | ||
10 | strip: //div[@id='internal-side-bar'] | ||
11 | |||
12 | strip: //span[@class='hide'] | ||
13 | strip: //div[@class='date'] | ||
14 | test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/svd.se.txt b/inc/3rdparty/site_config/standard/svd.se.txt deleted file mode 100755 index bc0a1ca0..00000000 --- a/inc/3rdparty/site_config/standard/svd.se.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | body: //div[@id='article-content'] | ||
2 | author: //div[@id='article']//div[@class='byline']/p | ||
3 | |||
4 | # Ads | ||
5 | strip_id_or_class: articlead | ||
6 | |||
7 | # Sharing | ||
8 | strip_id_or_class: share | ||
9 | |||
10 | prune: no | ||
11 | |||
12 | test_url: http://www.svd.se/nyheter/inrikes/oppositionen-stoppar-skattesankning_8531228.svd | ||
13 | test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd | ||
14 | test_url: http://www.svd.se/?service=rss&type=senastenytt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/svt.se.txt b/inc/3rdparty/site_config/standard/svt.se.txt deleted file mode 100755 index ba35f7d1..00000000 --- a/inc/3rdparty/site_config/standard/svt.se.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //article[@role='main']//h1 | ||
2 | body: //article[@role='main'] | ||
3 | strip: //aside | ||
4 | replace_string(<noscript>): <div> | ||
5 | replace_string(</noscript>): </div> | ||
6 | strip_id_or_class: svtHide-No-Js | ||
7 | strip_id_or_class: aside | ||
8 | strip_id_or_class: Aside | ||
9 | strip_id_or_class: hidden | ||
10 | strip_id_or_class: Share | ||
11 | tidy: no | ||
12 | prune: no | ||
13 | |||
14 | test_url: http://www.svt.se/ug/framtidsdrommar-om-jobb-blev-lackande-gifthal | ||
15 | test_url: http://www.svt.se/nyheter/het-debatt-mellan-borg-och-andersson | ||
16 | test_url: http://www.svt.se/nyheter/regionalt/svtsormland/sj-tag-evakuerades-efter-rokdrama \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt deleted file mode 100755 index 24ba1426..00000000 --- a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | author: //a[contains(@href, '/sok/?')]/text() | ||
4 | |||
5 | date: //meta[@name='bi3dPubDate']/@content | ||
6 | |||
7 | body: (//div[contains(@class, 'slider_wrapper')])[1] | //div[@id='article_image' or @class='two_column_left'] | ||
8 | strip_id_or_class: story | ||
9 | strip_id_or_class: article_body_ad | ||
10 | strip: //div[@class='leadText saplo:lead']/h5 | ||
11 | |||
12 | replace_string(<br />): <br /><br /> | ||
13 | |||
14 | prune: no | ||
15 | |||
16 | test_url: http://www.sydsvenskan.se/malmo/allt-jag-ager-ligger-pa-botten/ | ||
17 | test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna-- | ||
18 | test_url: http://www.sydsvenskan.se/rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt deleted file mode 100755 index 5bcfb9ef..00000000 --- a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //div[contains(@class, "post")]/h2 | ||
2 | |||
3 | author: //div[contains(@class, "post")]/p[position()=last()]/text()[1] | ||
4 | |||
5 | date: //div[contains(@class, "post")]/p[1] | ||
6 | |||
7 | body: //div[contains(@class, "post")] | ||
8 | |||
9 | strip: //div[contains(@class, "post")]/h2[1] | ||
10 | strip: //div[contains(@class, "post")]/p[1] | ||
11 | strip: //div[contains(@class, "post")]/p[position()=last()] | ||
12 | test_url: http://www.symmetrymagazine.org/breaking/?p=12784 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt deleted file mode 100755 index e058032c..00000000 --- a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body://div[@class='drucken'] | ||
3 | author: substring-after(//span[@class='autor'], 'Von ') | ||
4 | author: //span[@class='autor'] | ||
5 | |||
6 | single_page_link://a[contains(@href, '/drucken/')] | ||
7 | convert_double_br_tags:yes | ||
8 | |||
9 | dissolve://div[@class='vorspann'] | ||
10 | |||
11 | strip://h1 | ||
12 | strip_id_or_class: klassifizierung | ||
13 | strip_id_or_class: source | ||
14 | strip_id_or_class: autor | ||
15 | test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sz.de.txt b/inc/3rdparty/site_config/standard/sz.de.txt deleted file mode 100755 index f67637d2..00000000 --- a/inc/3rdparty/site_config/standard/sz.de.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | # 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... | ||
2 | |||
3 | single_page_link: //a[ contains( @href, "/2.220/" ) ] | ||
4 | |||
5 | body: //article[@id="sitecontent"]/section[@class="body"] | ||
6 | author: //address[@class="author"] | ||
7 | date: //div[@class="header"]//h1//span[@class="updated"] | ||
8 | wrap_in(small): //div[@class="footer"] | ||
9 | wrap_in(i): //figcaption/h3 | ||
10 | dissolve: //figcaption//h3 | ||
11 | dissolve: //figure/div[@class="body"] | ||
12 | dissolve: //figure/a | ||
13 | |||
14 | strip: //figure[ not( contains(@class, "zoomimage" ) ) ] | ||
15 | strip: //div[@data-onlineonly="true"] | ||
16 | strip: //address[@class="author"] | ||
17 | |||
18 | test_url: http://sz.de/1.1556693 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tabletmag.com.txt b/inc/3rdparty/site_config/standard/tabletmag.com.txt deleted file mode 100755 index 58b1f5bb..00000000 --- a/inc/3rdparty/site_config/standard/tabletmag.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[contains(@class, 'story-text')] | ||
2 | |||
3 | strip_id_or_class: related | ||
4 | |||
5 | test_url: http://www.tabletmag.com/jewish-news-and-politics/181181/mossberg-parallel-states?all=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tagesschau.de.txt b/inc/3rdparty/site_config/standard/tagesschau.de.txt deleted file mode 100755 index be76cd05..00000000 --- a/inc/3rdparty/site_config/standard/tagesschau.de.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | title://h1[1] | ||
2 | |||
3 | author: substring-after(//em, 'Von ') | ||
4 | author:string('tagesschau.de') | ||
5 | |||
6 | date:substring-after(//div[@class='standDatum'], 'Stand: ') | ||
7 | |||
8 | body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')] | ||
9 | |||
10 | strip://h1[1] | ||
11 | strip: //div[contains(@class, 'directLinks')] | ||
12 | strip: //div[contains(@class, 'zitatBox')] | ||
13 | strip: //div[contains(@class, 'teaserBox metaBlock')] | ||
14 | strip: //*[contains(@class, 'inv')] | ||
15 | strip: //span[@class='imgSubline'] | ||
16 | strip: //*[contains(@class, 'topline')][1] | ||
17 | strip: //div[@id='rightCol'][1] | ||
18 | strip: //div[@id="footer"][1] | ||
19 | strip: //div[@class="fPlayer"] | ||
20 | strip: //div[@id='seitenanfang'] | ||
21 | strip: //div[@class='standDatum'] | ||
22 | strip: //em | ||
23 | test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tagesspiegel.de.txt b/inc/3rdparty/site_config/standard/tagesspiegel.de.txt deleted file mode 100755 index 57e7d3df..00000000 --- a/inc/3rdparty/site_config/standard/tagesspiegel.de.txt +++ /dev/null | |||
@@ -1,60 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | # Should work with "normal" articles as well as with image galleries | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | # Title | ||
7 | title: //h1/span[@class='hcf-headline'] | ||
8 | |||
9 | # Set author | ||
10 | author: //a[@rel='author'] | ||
11 | |||
12 | # Set date | ||
13 | date: //span[@class='date hcf-atlas'] | ||
14 | |||
15 | # Fetch full multipage articles | ||
16 | next_page_link: //a[contains(@class, 'hcf-forward')] | ||
17 | |||
18 | # Content is here | ||
19 | body: //article | ||
20 | body: //div[contains(@class, 'hcf-screen')] | ||
21 | |||
22 | # Remove tracking and ads | ||
23 | strip_id_or_class: hcf-ad | ||
24 | strip_id_or_class: hcf-autoload-ad | ||
25 | strip_id_or_class: hcf-content-ad | ||
26 | |||
27 | # Tidy up before article | ||
28 | strip: //article/h1 | ||
29 | strip_id_or_class: hcf-atlas | ||
30 | strip_id_or_class: hcf-author | ||
31 | strip_id_or_class: date hcf-atlas | ||
32 | strip_id_or_class: date hcf-atlas | ||
33 | |||
34 | # General cleanup | ||
35 | strip: //div[contains(@class, 'hcf-screen')]//h1 | ||
36 | strip: //div[@class='hcf-subpage-titles']//ul | ||
37 | strip_id_or_class: hcf-doctype-media | ||
38 | strip_id_or_class: hcf-inline-gallery | ||
39 | strip_id_or_class: hcf-doctype-video | ||
40 | strip_id_or_class: hcf-links | ||
41 | strip_id_or_class: hcf-mini-navi | ||
42 | strip_id_or_class: hcf-media-control | ||
43 | strip_id_or_class: hcf-hidden | ||
44 | replace_string(<span class="hcf-update">Update</span>): <strong>Update: </strong> | ||
45 | |||
46 | # Fix pictures and captions | ||
47 | replace_string(<a class="hcf-doctype-gallery): <p class="hcf-doctype-gallery | ||
48 | replace_string(<a class="hcf-doctype-enlarge): <p class="hcf-doctype-enlarge | ||
49 | replace_string(<figcaption class="hcf-caption">): <br><small><em> | ||
50 | replace_string(</figcaption>): </em></small> | ||
51 | |||
52 | # Fix image galleries | ||
53 | replace_string(<a class=" ajaxify): <p class="ajaxify | ||
54 | replace_string(<div class="hcf-caption"><div><p>): <small><em> | ||
55 | |||
56 | # Try it yourself | ||
57 | test_url: http://www.tagesspiegel.de/berlin/bezirke/wedding/wedding-jetzt/auf-der-suche-nach-einem-stadtteil-wilder-weiter-wedding/8757156.html | ||
58 | test_url: http://www.tagesspiegel.de/berlin/olympia-in-berlin-der-flughafen-tegel-soll-das-olympische-dorf-werden/10645036.html | ||
59 | test_url: http://www.tagesspiegel.de/mediacenter/fotostrecken/berlin/bildergalerie-kreuzberger-der-woche/9305534.html | ||
60 | |||
diff --git a/inc/3rdparty/site_config/standard/tampabay.com.txt b/inc/3rdparty/site_config/standard/tampabay.com.txt deleted file mode 100755 index 47a6ffab..00000000 --- a/inc/3rdparty/site_config/standard/tampabay.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //span[@class="entry-title"] | ||
2 | author: //*[contains(@class, 'item')]/p/a/text() | ||
3 | date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:') | ||
4 | body: //div[@class="entry-content"] | ||
5 | test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/taptaptap.com.txt b/inc/3rdparty/site_config/standard/taptaptap.com.txt deleted file mode 100755 index e1e79428..00000000 --- a/inc/3rdparty/site_config/standard/taptaptap.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h3[@class="storytitle"] | ||
2 | body: //div[@class="post"] | ||
3 | strip: //div[@class="blurbBox"] | ||
4 | test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tasteofhome.com.txt b/inc/3rdparty/site_config/standard/tasteofhome.com.txt deleted file mode 100755 index f3234f34..00000000 --- a/inc/3rdparty/site_config/standard/tasteofhome.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //div[@id='ctl00_MainContent_ctl00_Div1']//h2 | ||
2 | body: //div[@id='ctl00_MainContent_ctl00_Div1'] | ||
3 | |||
4 | single_page_link: //div[contains(@class, 'recipeHeader')]//a[contains(@href, '/print')] | ||
5 | |||
6 | strip_image_src: tohPrintL.png | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.tasteofhome.com/recipes/Grinch-Punch | ||
11 | test_url: http://www.tasteofhome.com/recipes/lactose-free-chocolate-chip-cookies \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/taz.de.txt b/inc/3rdparty/site_config/standard/taz.de.txt deleted file mode 100755 index cf853662..00000000 --- a/inc/3rdparty/site_config/standard/taz.de.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | date: //div[@class='secthead'] | ||
2 | body: //div[@class='sectbody'] | ||
3 | title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) | ||
4 | author: //span[@class='author'] | ||
5 | strip: //p[@class='caption'] | ||
6 | strip_id_or_class: rack | ||
7 | |||
8 | test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tbray.org.txt b/inc/3rdparty/site_config/standard/tbray.org.txt deleted file mode 100755 index 558dc9c8..00000000 --- a/inc/3rdparty/site_config/standard/tbray.org.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | body: //div[@id='centercontent'] | ||
2 | strip: //div[@id='rightcontent'] | ||
3 | date: substring-before( //div[@id='cats'], '·') | ||
4 | title: //h1 | ||
5 | test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tcmanila.tk.txt b/inc/3rdparty/site_config/standard/tcmanila.tk.txt deleted file mode 100755 index f6032ec3..00000000 --- a/inc/3rdparty/site_config/standard/tcmanila.tk.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@class="post_content"] | ||
3 | author: //span[@class="fn"] | ||
4 | date: //time[@class="updated"] | ||
5 | strip_comments: //yes | ||
6 | footnotes: //yes | ||
7 | test_url: http://tcmanila.tk/post/29189064358/my-2012-roadmap-is-almost-complete-look-at-the \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tcng.org.txt b/inc/3rdparty/site_config/standard/tcng.org.txt deleted file mode 100755 index 4873b50d..00000000 --- a/inc/3rdparty/site_config/standard/tcng.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //div[@id='main-content']/h1 | ||
2 | body: //div[@id='main-content'] | ||
3 | strip: //div[@id='main-content']/h1 | ||
4 | test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt deleted file mode 100755 index da198622..00000000 --- a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h1[@class='storyheadline'] | ||
2 | body: //div[@class='storytext'] | ||
3 | strip: //strong | ||
4 | test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tech.gilt.com.txt b/inc/3rdparty/site_config/standard/tech.gilt.com.txt deleted file mode 100755 index ab564606..00000000 --- a/inc/3rdparty/site_config/standard/tech.gilt.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class="title"]/h1 | ||
2 | title: //div[@class="caption"]/h1 | ||
3 | author: substring-after(//div[@class="metadata"]/div[@class="date"]/a[2], 'by ') | ||
4 | date: //div[@class="metadata"]/div[@class="date"]/a | ||
5 | test_url: http://tech.gilt.com/post/46359463184/26-3-13-todays-noon-outage-and-what-were-doing-to \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt deleted file mode 100755 index 75126f9c..00000000 --- a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title://h1[contains(@id,'artibodyTitle')] | ||
2 | |||
3 | date://span[contains(@id,'pub_date')] | ||
4 | |||
5 | body://div[contains(@id,'artibody')] | ||
6 | |||
7 | strip://div[contains(@class,'otherContent')] | ||
8 | |||
9 | next_page_link://p[@class='page']/a[contains(.,'下一页')] | ||
10 | |||
11 | test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techcrunch.com.txt b/inc/3rdparty/site_config/standard/techcrunch.com.txt deleted file mode 100755 index 1509c46e..00000000 --- a/inc/3rdparty/site_config/standard/techcrunch.com.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')] | ||
2 | |||
3 | author: //a[@class="name"] | ||
4 | |||
5 | date: //div[@class="post-time"] | ||
6 | |||
7 | title: //h1[@class="headline"] | ||
8 | strip_id_or_class: module-crunchbase | ||
9 | |||
10 | # The following is for the mobile site | ||
11 | body: //div[@id="singlentry"] | ||
12 | author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ') | ||
13 | date: substring-before(//div[@class="single-post-meta-top"],' @') | ||
14 | title: //a[@class="sh2"] | ||
15 | |||
16 | prune: no | ||
17 | |||
18 | test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techdirt.com.txt b/inc/3rdparty/site_config/standard/techdirt.com.txt deleted file mode 100755 index 7db2f95b..00000000 --- a/inc/3rdparty/site_config/standard/techdirt.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@class='story'] | ||
2 | title: //div[@class='story']/h1 | ||
3 | strip: //div[@class='story']/h1 | ||
4 | |||
5 | author: //div[@class='details']/p[contains(., 'by ')]/a | ||
6 | date: //p[@class='storydate'] | ||
7 | |||
8 | strip: //p[a[contains(., 'Leave a Comment')]] | ||
9 | strip_id_or_class: share | ||
10 | strip_id_or_class: maincolumn_head | ||
11 | strip_id_or_class: maincolmod | ||
12 | test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techhive.com.txt b/inc/3rdparty/site_config/standard/techhive.com.txt deleted file mode 100755 index 29720b0b..00000000 --- a/inc/3rdparty/site_config/standard/techhive.com.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | title: //div[@class='articleHead']//h1 | ||
2 | author: //div[@class="author-name"]/a[1] | ||
3 | body: //div[@class="main"] | ||
4 | |||
5 | # remove 'From the Lab' and 'Recent posts' text | ||
6 | strip: //div[@class='blogLabel'] | ||
7 | |||
8 | # remove byline and meta info | ||
9 | strip: //div[@class="article-meta"] | ||
10 | strip: //div[@class="author-info"] | ||
11 | |||
12 | #strip tags and categories | ||
13 | strip: //div[@class="department"] | ||
14 | |||
15 | #strip product cap links | ||
16 | strip: //div[@class="cap-main"] | ||
17 | strip: //div[@id="compare-lede"] | ||
18 | test_url: http://www.techhive.com/article/2010549/up-close-with-blackberry-10.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techmeme.com.txt b/inc/3rdparty/site_config/standard/techmeme.com.txt deleted file mode 100755 index 26eb37b0..00000000 --- a/inc/3rdparty/site_config/standard/techmeme.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | single_page_link_in_feed: //b/a | ||
2 | |||
3 | test_url: http://www.techmeme.com/feed.xml | ||
diff --git a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt deleted file mode 100755 index d871b603..00000000 --- a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h2 | ||
2 | author: //meta[@name="author"]/@content | ||
3 | date: //h3 | ||
4 | body: //div[@class="postBody"] | ||
5 | strip: //h1 | ||
6 | strip: //h2 | ||
7 | strip: //h3 | ||
8 | test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/technologizer.com.txt b/inc/3rdparty/site_config/standard/technologizer.com.txt deleted file mode 100755 index 179bf5a6..00000000 --- a/inc/3rdparty/site_config/standard/technologizer.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | next_page_link: //a[contains(., 'NEXT PAGE')] | ||
2 | # following::node() selects text nodes too whereas following::* selects only elements. | ||
3 | strip: //span[@class='pageo']/following::node() | ||
4 | strip: //span[@class='pageo'] | ||
5 | test_url: http://technologizer.com/2010/03/08/the-secret-origin-of-windows/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/technologyreview.com.txt b/inc/3rdparty/site_config/standard/technologyreview.com.txt deleted file mode 100755 index d405eb18..00000000 --- a/inc/3rdparty/site_config/standard/technologyreview.com.txt +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | title: //header[@class='article-meta']/h1 | ||
2 | title: substring-before(//title, '|') | ||
3 | |||
4 | body: //section[contains(@class, 'body')] | ||
5 | |||
6 | # Author & Date for News and Featured Stories | ||
7 | author: //ul[@class='byline']/li/a | ||
8 | author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on') | ||
9 | date: substring-after(//ul[@class='byline']/li, 'on ') | ||
10 | |||
11 | # Author & Date for "Views" | ||
12 | author: //div[@class='view-byline']/div[@class='meta']/h2[1] | ||
13 | date: //div[@class='view-byline']/div[@class='meta']/h2[2] | ||
14 | |||
15 | next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')] | ||
16 | test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techpinions.com.txt b/inc/3rdparty/site_config/standard/techpinions.com.txt deleted file mode 100755 index 8e1aa96c..00000000 --- a/inc/3rdparty/site_config/standard/techpinions.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[@class="post"] | ||
2 | |||
3 | strip: //div[@class="post-meta"] | ||
4 | strip: //div[@id="socialicons"] | ||
5 | strip: //div[@id="authorbox"] | ||
6 | |||
7 | test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techradar.com.txt b/inc/3rdparty/site_config/standard/techradar.com.txt deleted file mode 100755 index 0a0ca619..00000000 --- a/inc/3rdparty/site_config/standard/techradar.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | # Title without news/reviews etc. appended | ||
2 | title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1 | ||
3 | |||
4 | # Remove home link | ||
5 | strip: //div[@id='page_logo']/a | ||
6 | |||
7 | # Remove utilities | ||
8 | strip: //*[(@id = "utilities")] | ||
9 | |||
10 | # Remove comments link | ||
11 | strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny'] | ||
12 | test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/telegraaf.nl.txt b/inc/3rdparty/site_config/standard/telegraaf.nl.txt deleted file mode 100755 index 91b5baf9..00000000 --- a/inc/3rdparty/site_config/standard/telegraaf.nl.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@id='artikelKolom'] | ||
2 | strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper'] | ||
3 | strip: //div[@id='artikeltoolbar'] | ||
4 | strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer'] | ||
5 | strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget'] | ||
6 | tidy: no | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt deleted file mode 100755 index 8dcdb42b..00000000 --- a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea'] | ||
2 | strip: //p[@class='comments'] | ||
3 | strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")] | ||
4 | strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links'] | ||
5 | strip: //p[@class='bbpTweet']/span[@class='timestamp'] | ||
6 | strip: //p[@class='bbpTweet']/span[@class='metadata']//img | ||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt b/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt deleted file mode 100755 index 596ecc90..00000000 --- a/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body://div[@id="print-news"] | ||
2 | strip://a | ||
3 | strip://span[@class="date-line"] | ||
4 | test_url: http://www.thanhnien.com.vn/pages/20121006/hon-90-trieu-usd-nang-cap-do-thi-can-tho.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/the-magazine.org.txt b/inc/3rdparty/site_config/standard/the-magazine.org.txt deleted file mode 100755 index 08864657..00000000 --- a/inc/3rdparty/site_config/standard/the-magazine.org.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | tidy: no | ||
2 | |||
3 | test_url: http://the-magazine.org/1/alone-together-again \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theage.com.au.txt b/inc/3rdparty/site_config/standard/theage.com.au.txt deleted file mode 100755 index ea27c314..00000000 --- a/inc/3rdparty/site_config/standard/theage.com.au.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: //h3[@class='authorName'] | ||
2 | date: //time | ||
3 | body: //div[@class='articleBody'] | ||
4 | strip_id_or_class: adspot | ||
5 | test_url: http://www.theage.com.au/victoria/top-cops-warns-outlaw-bikies-we-have-a-gang-too-20130331-2h1l8.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theamericanscholar.org.txt b/inc/3rdparty/site_config/standard/theamericanscholar.org.txt deleted file mode 100755 index 38b96672..00000000 --- a/inc/3rdparty/site_config/standard/theamericanscholar.org.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | # Article Metadata | ||
2 | title: //meta[@property="og:title"]/@content | ||
3 | author: substring-after(//h3, 'By ') | ||
4 | date: //h4/a[2] | ||
5 | |||
6 | # Content Pruning | ||
7 | strip: //h4 | ||
8 | strip: //a[@id="print_button"] | ||
9 | strip: //p[@class="excerpt"] | ||
10 | strip: //h3 | ||
11 | strip: //div[@class="caption"] | ||
12 | strip: //center/a/img | ||
13 | test_url: http://theamericanscholar.org/too-big-to-fail-and-too-risky-to-exist/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theappleblog.com.txt b/inc/3rdparty/site_config/standard/theappleblog.com.txt deleted file mode 100755 index caa5ae0c..00000000 --- a/inc/3rdparty/site_config/standard/theappleblog.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | # Remove home link | ||
2 | strip: //div[@id='blog-title']/a | ||
3 | test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theatlantic.com.txt b/inc/3rdparty/site_config/standard/theatlantic.com.txt deleted file mode 100755 index 3fc5611b..00000000 --- a/inc/3rdparty/site_config/standard/theatlantic.com.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | title: //div[contains(@class, 'articleHead')]//h1 | ||
2 | |||
3 | body: //div[@class='articleText'] | ||
4 | body: //div[@class='articleContent'] | ||
5 | body: //div[@id='article'] | ||
6 | date: //*[contains(@class, 'date')] | ||
7 | author: //div[@id='profile']//*[@class='authors']//a[1] | ||
8 | author: //*[@class='author']/span | ||
9 | prune: no | ||
10 | |||
11 | strip: //div[@class='moreOnBoxWithImages'] | ||
12 | strip: //p[contains(., 'This article available online at:')] | ||
13 | strip: //p[contains(., 'This article available online at:')]/following::* | ||
14 | strip: //div[@class='earthbox'] | ||
15 | |||
16 | single_page_link: //article//a[contains(@class, 'print')] | ||
17 | |||
18 | native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')] | ||
19 | |||
20 | test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ | ||
21 | test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ | ||
22 | test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theatlanticcities.com.txt b/inc/3rdparty/site_config/standard/theatlanticcities.com.txt deleted file mode 100755 index 880f207d..00000000 --- a/inc/3rdparty/site_config/standard/theatlanticcities.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | # To administrator: | ||
2 | # Please replace the hostname with "*.theatlanticcities.com" | ||
3 | |||
4 | # This filter is tested on: | ||
5 | # http://m.theatlanticcities.com/arts-and-lifestyle/2012/04/invisible-borders-define-american-culture/1839/ | ||
6 | # http://www.theatlanticcities.com/housing/2012/11/chinas-holdouts/3981/ | ||
7 | # http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/ | ||
8 | |||
9 | title://h1 | ||
10 | author: //ul[@class='meta']/li/a | ||
11 | date: //ul[@class='meta']/li/following-sibling::li | ||
12 | body://article[@class='post'] | ||
13 | |||
14 | strip://h1 | ||
15 | strip://ul[@class='meta'] | ||
16 | strip://div[@class='newsletter-slug'] | ||
17 | test_url: http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt deleted file mode 100755 index b74442de..00000000 --- a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //meta[@name='og:title']/@content | ||
2 | date: //meta[@name='created']/@content | ||
3 | body: //div[@class="StoryBody" or @class="storyTeaser"] | ||
4 | |||
5 | replace_string(<p></p>): <br /><br /> | ||
6 | |||
7 | test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thebrowser.com.txt b/inc/3rdparty/site_config/standard/thebrowser.com.txt deleted file mode 100755 index 807e7dad..00000000 --- a/inc/3rdparty/site_config/standard/thebrowser.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h2[contains(@class, 'page-title')] | ||
2 | body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content'] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | strip: //div[contains(@class, 'node-book')]//a[@class='button'] | ||
7 | |||
8 | single_page_link: //a[@class='tool-print'] | ||
9 | |||
10 | test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thecarton.net.txt b/inc/3rdparty/site_config/standard/thecarton.net.txt deleted file mode 100755 index 13fa35a0..00000000 --- a/inc/3rdparty/site_config/standard/thecarton.net.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: substring-before(//title, ' – ') | ||
2 | author:string('Shawn') | ||
3 | date: //*/time/@pubdate | ||
4 | |||
5 | |||
6 | strip: //header | ||
7 | strip: //div[@id='prev_next'] | ||
8 | strip: //div[@id='masthead'] | ||
9 | |||
10 | test_url: http://thecarton.net/2012/12/20/imdb \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt deleted file mode 100755 index e255e6a8..00000000 --- a/inc/3rdparty/site_config/standard/thedaily.com.txt +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | #keep all body text | ||
2 | prune: no | ||
3 | |||
4 | #title, body, metadata | ||
5 | title: //div[@class='story_header']/h1 | ||
6 | body: //div[@id='content'] | ||
7 | author: substring-after(//span[@class='byline'], "by ") | ||
8 | author: substring-after(//span[@class='byline'], "By ") | ||
9 | author: //span[@class='byline'] | ||
10 | date: //span[@class='date'] | ||
11 | |||
12 | #formatting | ||
13 | convert_double_br_tags: yes | ||
14 | dissolve: //div[@class='slides_full']/ul/li | ||
15 | |||
16 | # cleanup | ||
17 | strip: //a[@id='story_note'] | ||
18 | strip: //br | ||
19 | strip: //div[@class='intro'] | ||
20 | strip: //div[@class='share-block'] | ||
21 | strip: //div[@class='sidebar-social'] | ||
22 | strip: //div[@class='top-stories'] | ||
23 | strip: //div[@class='prevnext'] | ||
24 | test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thedailybeast.com.txt b/inc/3rdparty/site_config/standard/thedailybeast.com.txt deleted file mode 100755 index f5e938ae..00000000 --- a/inc/3rdparty/site_config/standard/thedailybeast.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //article/div[contains(@class, 'article-body')] | ||
3 | #strip: //header/hgroup/h1 | ||
4 | strip: //footer[@class='storyFooter'] | ||
5 | single_page_link: //li[@class='print']/a | ||
6 | prune: no | ||
7 | test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt deleted file mode 100755 index a83a6cf6..00000000 --- a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | # Remove duplicated title | ||
2 | strip: //div[@id='content']/div[1][@class='full_intro']/h2 | ||
3 | |||
4 | # Remove links, ads etc. | ||
5 | strip: //*[(@class= "aside")] | ||
6 | |||
7 | # Remove the date and add it to the date published field in Instapaper | ||
8 | strip: //div[@class="date"] | ||
9 | date: //div[@class="date"] | ||
10 | |||
11 | # There is no byline on The Daily Mash. | ||
12 | |||
13 | convert_double_br_tags: yes | ||
14 | test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thedisneyblog.com.txt b/inc/3rdparty/site_config/standard/thedisneyblog.com.txt deleted file mode 100755 index 57b3254a..00000000 --- a/inc/3rdparty/site_config/standard/thedisneyblog.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'entry-title')] | ||
2 | author: //span[contains(@class, 'author vcard')] | ||
3 | date: //span[@class = 'entry-date'] | ||
4 | body: //div[@class='entry-content'] | ||
5 | strip_id_or_class: bottomcontainerBox | ||
6 | strip_id_or_class: lightsocial_container | ||
7 | test_url: http://thedisneyblog.com/2012/11/17/videopolis-one-woman-disney-musical-beauty-and-the-beast/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt b/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt deleted file mode 100755 index a19bae15..00000000 --- a/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | # Tested on: | ||
2 | # http://theeuropean-magazine.com/352-dyson-george/353-evolution-and-innovation | ||
3 | # http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt | ||
4 | |||
5 | title://h2[@class='article-title'] | ||
6 | author:substring-before(substring-after(//p[@class='article-meta'], 'by'), '—') | ||
7 | date:substring-after(//p[@class='article-meta'], '—') | ||
8 | body://div[@class='article'] | ||
9 | |||
10 | wrap_in(strong)://p[@class='article-teaser'] | ||
11 | move_into(//div[@class='article-head'])://li/img | ||
12 | |||
13 | strip://h2[@class='article-title'] | ||
14 | strip://p[@class='article-meta'] | ||
15 | strip://div[@class='copyright'] | ||
16 | strip://div[@class='opinions-of-readers'] | ||
17 | test_url: http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thefilmexperience.net.txt b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt deleted file mode 100755 index e6b5115a..00000000 --- a/inc/3rdparty/site_config/standard/thefilmexperience.net.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='body'] | ||
2 | test_url: http://thefilmexperience.net/blog/2011/12/30/distant-relatives-2001-a-space-odyssey-and-the-tree-of-life.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt b/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt deleted file mode 100755 index 849ede77..00000000 --- a/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | ## ERROR: Removes all images. Please fix, have no idea why (bad HTML?) | ||
2 | |||
3 | title: //h1[@class='featuretitle'] | ||
4 | body: //div[@id='nobordercontentarea'] | ||
5 | |||
6 | # remove Twitter badge | ||
7 | strip: //img[@alt='Follow tgdfweb on Twitter'] | ||
8 | |||
9 | # fix for headers not showing for some reason | ||
10 | wrap_in(h2): //h2[@class='sectionheader'] | ||
11 | dissolve: //h2[@class='sectionheader'] | ||
12 | |||
13 | tidy: yes | ||
14 | test_url: http://thegamedesignforum.com/features/acceleration_flow_1.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theglobalmail.org.txt b/inc/3rdparty/site_config/standard/theglobalmail.org.txt deleted file mode 100755 index da1c84f9..00000000 --- a/inc/3rdparty/site_config/standard/theglobalmail.org.txt +++ /dev/null | |||
@@ -1,41 +0,0 @@ | |||
1 | title: //h1[@id="headline"] | ||
2 | author: //div[contains(@class, "editorial-byline-author")]/a | ||
3 | date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ") | ||
4 | |||
5 | # The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed | ||
6 | body: //div[@id="template"] | ||
7 | strip_id_or_class: editorial-byline-pic | ||
8 | strip_id_or_class: editorial-byline | ||
9 | strip_id_or_class: headline | ||
10 | |||
11 | # Include the leadin paragraph in the body text, but remove quotes because they're out of context | ||
12 | dissolve: //div[contains(@id, "leadin")] | ||
13 | strip_id_or_class: pullquote | ||
14 | |||
15 | # Image captions removed because they're confusing in body text | ||
16 | strip_id_or_class: image-caption-content | ||
17 | |||
18 | # Remove header and footer | ||
19 | strip_id_or_class: header | ||
20 | strip_id_or_class: footer | ||
21 | |||
22 | # Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image | ||
23 | strip: /html/body/span[contains(@style, "display: none")] | ||
24 | |||
25 | # Remove search box | ||
26 | strip_id_or_class: searchContainer | ||
27 | strip: //div[contains(@class, "searchInstruction")] | ||
28 | strip: //div[contains(@class, "searchResults")]/h4 | ||
29 | |||
30 | # Remove the 'Letters to the Editor' section | ||
31 | strip_id_or_class: letter-text | ||
32 | strip_id_or_class: letter-from | ||
33 | strip_id_or_class: letter-date | ||
34 | |||
35 | # Remove Like/Tweet links | ||
36 | strip_id_or_class: social-tab | ||
37 | |||
38 | # Remove 'divider' which causes an inexplicable slash to appear in the article body | ||
39 | strip_id_or_class: divider | ||
40 | |||
41 | test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt deleted file mode 100755 index 2473cad2..00000000 --- a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | body: //div[contains(@class, 'entry-content')]//div[contains(@class, 'column-2')] | ||
2 | single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')] | ||
3 | strip_id_or_class: entry-related | ||
4 | strip_id_or_class: entry-sidebar | ||
5 | strip_id_or_class: entry-pagination | ||
6 | tidy: no | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/ | ||
10 | test_url: http://www.theglobeandmail.com/report-on-business/industry-news/energy-and-resources/cliffs-natural-resources-looking-to-exit-ontarios-ring-of-fire/article20651617/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt b/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt deleted file mode 100755 index 12442b40..00000000 --- a/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@id='headline'] | ||
2 | author: substring-after(//section[@class="credits"]/ul/li[1],"Interview by ") | ||
3 | date: //time[@pubdate] | ||
4 | body: //article[@class='interview'] | ||
5 | strip: //article[@class='interview']/footer | ||
6 | test_url: http://thegreatdiscontent.com/jeffrey-zeldman \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theguardian.com.txt b/inc/3rdparty/site_config/standard/theguardian.com.txt deleted file mode 100755 index 88e2ecf4..00000000 --- a/inc/3rdparty/site_config/standard/theguardian.com.txt +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | title: //div[@id='main-article-info']//h1 | ||
2 | body: //div[@id='article-wrapper'] | ||
3 | date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate] | ||
4 | strip: //div[contains(@class, 'email-subscription')] | ||
5 | strip: //div[contains(@class, 'kindleWidget')] | ||
6 | #strip: //a[not(text())] | ||
7 | strip_id_or_class: pocket-btn | ||
8 | author: //li[@class='byline'] | ||
9 | native_ad_clue: //meta[@property="article:tag" and contains(@content, "Partner zone")] | ||
10 | native_ad_clue: //meta[@property="video:tag" and contains(@content, "Partner zone")] | ||
11 | prune: no | ||
12 | tidy: no | ||
13 | |||
14 | test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption | ||
15 | test_contains: The National Security Agency has made repeated attempts to develop | ||
16 | test_contains: The agency did not directly address those questions, instead providing a statement. | ||
17 | |||
18 | test_url: http://www.theguardian.com/world/2013/oct/03/edward-snowden-files-john-lanchester | ||
19 | test_contains: In August, the editor of the Guardian rang me up and asked if I would spend a week in New York | ||
20 | test_contains: As the second most senior judge in the country, Lord Hoffmann, said in 2004 about a previous version of our anti-terrorism laws | ||
21 | |||
22 | test_url: http://www.theguardian.com/commentisfree/2014/jun/15/britishness-search-identity-my-part-in-camerons-odyssey | ||
23 | # Native ad | ||
24 | test_url: http://www.theguardian.com/sustainable-business/2014/jul/18/ben-jerry-turn-ice-cream-into-energy | ||
diff --git a/inc/3rdparty/site_config/standard/theindychannel.com.txt b/inc/3rdparty/site_config/standard/theindychannel.com.txt deleted file mode 100755 index 2cd865bb..00000000 --- a/inc/3rdparty/site_config/standard/theindychannel.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //h1[@class="Headline"] | ||
2 | date: substring-after(//div[@class="posted"], 'EDT ') | ||
3 | body: //div[@class="storyBody"] | ||
4 | |||
5 | strip: //td[@class="AssocContentTD"] | ||
6 | strip: //div[@id="pageTitle"] | ||
7 | strip: //div[@class="posted"] | ||
8 | strip: //div[@class="updated"] | ||
9 | strip: //div[@class="js-kit-disclaimer"] | ||
10 | strip: //table[@class="row3table"] | ||
11 | strip: //div[@class="container2"] | ||
12 | strip: //div[@id="delta"] | ||
13 | test_url: http://www.theindychannel.com/news/31050840/detail.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/themarker.com.txt b/inc/3rdparty/site_config/standard/themarker.com.txt deleted file mode 100755 index 141b1a3b..00000000 --- a/inc/3rdparty/site_config/standard/themarker.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'mainTitle')] | ||
2 | author: //ul[@class='author']//a[@rel='author'] | ||
3 | body: //div[@id='article-box'] | ||
4 | prune: no | ||
5 | tidy: no | ||
6 | strip_id_or_class: head | ||
7 | strip_id_or_class: social-nav | ||
8 | strip_id_or_class: rate | ||
9 | strip_id_or_class: video | ||
10 | |||
11 | test_url: http://www.themarker.com/markerweek/1.2093167 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/themillions.com.txt b/inc/3rdparty/site_config/standard/themillions.com.txt deleted file mode 100755 index 4d46daee..00000000 --- a/inc/3rdparty/site_config/standard/themillions.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: /html/body/div/div[2]/div/div/div/h3 | ||
2 | |||
3 | body: /html/body/div/div[2]/div/div/div/div[2] | ||
4 | |||
5 | strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div | ||
6 | |||
7 | tidy: no | ||
8 | |||
9 | # any way to get rid of this word character garbage? | ||
10 | test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt deleted file mode 100755 index 80aba441..00000000 --- a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: single-review | ||
2 | strip_id_or_class: featured-review | ||
3 | strip_id_or_class: resources | ||
4 | strip_id_or_class: rate-the-book | ||
5 | strip_id_or_class: write-review | ||
6 | |||
7 | test_url: http://themuseumofinnocence.com/review.php?id=1179 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thenation.com.txt b/inc/3rdparty/site_config/standard/thenation.com.txt deleted file mode 100755 index dab17f0b..00000000 --- a/inc/3rdparty/site_config/standard/thenation.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //h2[@property='dc:title'] | ||
2 | #body: //div[@class='print-content'] | ||
3 | body: //div[@id='wysiwyg'] | ||
4 | author: //a[contains(@href, '/authors')] | ||
5 | author: substring-before(//div[@class='print-created'], '|') | ||
6 | date: //span[@class='article-date'] | ||
7 | date: substring-after(//div[@class='print-created'], '|') | ||
8 | prune: no | ||
9 | |||
10 | #single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')] | ||
11 | single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '?page=full')] | ||
12 | |||
13 | test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt deleted file mode 100755 index b7f5f0f0..00000000 --- a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@id="beta-inner"] | ||
2 | title: //h3[@class="entry-header"] | ||
3 | |||
4 | test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thenextgeneration.org.txt b/inc/3rdparty/site_config/standard/thenextgeneration.org.txt deleted file mode 100755 index dedd989f..00000000 --- a/inc/3rdparty/site_config/standard/thenextgeneration.org.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1[@class='interior-page-title'] | ||
2 | author: //span[@class='author']/a | ||
3 | date: //div[@class='byline']/time | ||
4 | body: //div[@class='rich-text-body'] | ||
5 | |||
6 | strip: //div[@class='byline'] | ||
7 | strip: //div[@class='offscreen-menu'] | ||
8 | test_url: http://thenextgeneration.org/blog/post/rebrand-announce/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thenextweb.com.txt b/inc/3rdparty/site_config/standard/thenextweb.com.txt deleted file mode 100755 index 684fe82d..00000000 --- a/inc/3rdparty/site_config/standard/thenextweb.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body: //div[@class= 'article-body'] | ||
2 | author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')] | ||
3 | |||
4 | strip: //div[@class = 'bargo'] | ||
5 | strip: //div[@class = 'tf'] | ||
6 | strip: //div[@class = 'article']/div[@class = 'blue-box'] | ||
7 | strip_id_or_class: respond | ||
8 | |||
9 | tidy: no | ||
10 | next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href | ||
11 | |||
12 | test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt deleted file mode 100755 index c9abda71..00000000 --- a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id='fullstory'] | ||
2 | strip: //div[@id='page_leftbar'] | ||
3 | test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theonion.com.txt b/inc/3rdparty/site_config/standard/theonion.com.txt deleted file mode 100755 index 90e8d658..00000000 --- a/inc/3rdparty/site_config/standard/theonion.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h2[@class='title'] | ||
2 | date: substring-before(//p[@class='meta'], '|') | ||
3 | body: //div[@class='story'] | ||
4 | #body: //div[@class='article_body'] | ||
5 | |||
6 | strip: //h2[@class='title'] | ||
7 | strip: //p[@class='meta'] | ||
8 | strip: //div[@class='ga_section'] | ||
9 | strip: //div[@id='recent_slider'] | ||
10 | |||
11 | test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt deleted file mode 100755 index 75583cd3..00000000 --- a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | title: //h1[@class='post-title'] | ||
2 | body: //div[@class='post'] | ||
3 | author: //p[@class='posted-by'] | ||
4 | date: //div[@class='sprite post-date'] | ||
5 | |||
6 | # The body of the post doesn't have it's own div so we have to strip out the metadata | ||
7 | strip: //div[@class='author_avatar'] | ||
8 | strip: //div[@class='sprite post-date'] | ||
9 | strip: //h1[@class='post-title'] | ||
10 | strip: //p[@class='posted-by'] | ||
11 | test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theregister.co.uk.txt b/inc/3rdparty/site_config/standard/theregister.co.uk.txt deleted file mode 100755 index 5d30230d..00000000 --- a/inc/3rdparty/site_config/standard/theregister.co.uk.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # Updated 25-Jan-2014 | ||
2 | single_page_link: //a[contains(@href, '/Print/')] | ||
3 | |||
4 | title: //div[@id="article"]/h2 | ||
5 | author: //p[@class="byline"]/a | ||
6 | date: //p[@class="dateline"]/a[last()] | ||
7 | |||
8 | test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/ | ||
diff --git a/inc/3rdparty/site_config/standard/theroot.com.txt b/inc/3rdparty/site_config/standard/theroot.com.txt deleted file mode 100755 index 1f56316d..00000000 --- a/inc/3rdparty/site_config/standard/theroot.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id='node-content'] | ||
2 | strip_id_or_class: pager | ||
3 | test_url: http://www.theroot.com/views/why-i-am-male-feminist \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/therumpus.net.txt b/inc/3rdparty/site_config/standard/therumpus.net.txt deleted file mode 100755 index 84d0e783..00000000 --- a/inc/3rdparty/site_config/standard/therumpus.net.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: /html/body/div/div[2]/div/div/h1 | ||
2 | |||
3 | body: /html/body/div/div[2]/div/div/div[2] | ||
4 | test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thesiasat.com.txt b/inc/3rdparty/site_config/standard/thesiasat.com.txt deleted file mode 100755 index 68a8bc8e..00000000 --- a/inc/3rdparty/site_config/standard/thesiasat.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt deleted file mode 100755 index dcdf2572..00000000 --- a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h3[@class='post-title']/a[@class='post-title-link'] | ||
2 | body: //div[@class='post-content'] | ||
3 | author: //div[@class='post-meta-under-title']/a | ||
4 | test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt deleted file mode 100755 index ca983281..00000000 --- a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | strip: //*[(@id = "content")]/h2 | ||
2 | strip: //*[(@class = "wp-notable-line")] | ||
3 | test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thespoof.com.txt b/inc/3rdparty/site_config/standard/thespoof.com.txt deleted file mode 100755 index f71cfb6b..00000000 --- a/inc/3rdparty/site_config/standard/thespoof.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'cTitle')] | ||
2 | body: //div[contains(@class, 'KonaBody') or @id='articleimageright'] | ||
3 | author: //meta[@name='Author']/@content | ||
4 | date: //meta[@name='OriginalPublicationDate']/@content | ||
5 | |||
6 | prune: no | ||
7 | tidy: no | ||
8 | |||
9 | test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thestranger.com.txt b/inc/3rdparty/site_config/standard/thestranger.com.txt deleted file mode 100755 index 6fcf4fdf..00000000 --- a/inc/3rdparty/site_config/standard/thestranger.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | # savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029 | ||
2 | |||
3 | #other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885 | ||
4 | |||
5 | title: //div[@id='savageColumn_head']/h1 | ||
6 | title: //h1[@class="headlineLarge"] | ||
7 | |||
8 | strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner'] | ||
9 | |||
10 | body: //div[@id='savageColumn'] | ||
11 | body: //div[@id='story_text'] | ||
12 | test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thestreet.com.txt b/inc/3rdparty/site_config/standard/thestreet.com.txt deleted file mode 100755 index 58eabf00..00000000 --- a/inc/3rdparty/site_config/standard/thestreet.com.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | title: //div[@id='storyHdr']/h1 | ||
2 | title: //div[@id='print']//h2 | ||
3 | body: //div[@class="virtualpage"] | ||
4 | body: //div[@id='print']//div[@id='bd'] | ||
5 | author: //meta[@name="AUTHOR"]/@content | ||
6 | author: (//div[@id='print']//div[@id='bd']/h4)[1] | ||
7 | date: //meta[@name="DATE"]/@content | ||
8 | date: //div[@id='print']//div[@id='dte'] | ||
9 | |||
10 | strip_id_or_class: articleFooter | ||
11 | strip_id_or_class: sidebar | ||
12 | strip_id_or_class: ie6PrintSubhead | ||
13 | strip_id_or_class: subHdr | ||
14 | |||
15 | |||
16 | replace_string(<P/>): </p><p> | ||
17 | |||
18 | prune: no | ||
19 | |||
20 | #TODO: redirects back - perhaps needs referer to work | ||
21 | single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')] | ||
22 | |||
23 | test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html | ||
24 | # multi page | ||
25 | test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt deleted file mode 100755 index 6b3277eb..00000000 --- a/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip:/html/body/form/div[2]/div[3]/div/div/div/div/div/div/div/div/div/div[2]/div[3]/div[2]/div/p[2] | ||
2 | test_url: http://thethaovanhoa.vn/151N20110519085606745T129/levante-quyet-giu-caicedo.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt deleted file mode 100755 index 78f8654a..00000000 --- a/inc/3rdparty/site_config/standard/theverge.com.txt +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | author: //p[contains(@class, "byline")]/a[contains(@class, "author")] | ||
2 | |||
3 | date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime | ||
4 | |||
5 | body: //div[contains(@class, 'entry-content')] | ||
6 | # for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video | ||
7 | body: //article | ||
8 | prune: no | ||
9 | #tidy: no | ||
10 | |||
11 | strip: //article/header | ||
12 | strip: //*[@id='sticky-menu'] | ||
13 | strip: //aside | ||
14 | strip: //nav | ||
15 | strip: //img[contains(@class, 'vox-lazy-load')] | ||
16 | # deal with bad parsing | ||
17 | strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')] | ||
18 | strip: //div[contains(@class, 'm-linkset')] | ||
19 | strip: //div[contains(@class, 'm-entry__sidebar')] | ||
20 | strip: //ul[contains(@class, 'm-article__sources')] | ||
21 | strip: //div[contains(@class, 'chorus-emc__content')] | ||
22 | |||
23 | |||
24 | strip_id_or_class: gallery | ||
25 | strip_id_or_class: article-meta | ||
26 | strip_id_or_class: story-navigation | ||
27 | strip_id_or_class: slegend | ||
28 | strip_id_or_class: related-product-meta | ||
29 | strip_id_or_class: comments | ||
30 | strip_id_or_class: ui-jump-list | ||
31 | strip_id_or_class: pullquote | ||
32 | strip_id_or_class: m-ad | ||
33 | strip_id_or_class: social-sharing | ||
34 | strip_id_or_class: m-video-entry__excerpt | ||
35 | strip_id_or_class: hidden | ||
36 | |||
37 | replace_string(<noscript>): <div> | ||
38 | replace_string(</noscript>): </div> | ||
39 | |||
40 | find_string: <script | ||
41 | replace_string: <div style="display:none" | ||
42 | find_string: </script> | ||
43 | replace_string: </div> | ||
44 | |||
45 | strip: //q | ||
46 | |||
47 | strip: //a[contains(@class, 'entry-section-title')] | ||
48 | |||
49 | test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review | ||
50 | test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review | ||
51 | test_url: http://www.theverge.com/2013/2/24/4026114/barnes-noble-shifting-focus-away-from-nook-hardware | ||
52 | test_url: http://www.theverge.com/2014/6/19/5824072/top-shelf-living-the-dream | ||
53 | test_url: http://www.theverge.com/rss/frontpage | ||
diff --git a/inc/3rdparty/site_config/standard/theweek.com.txt b/inc/3rdparty/site_config/standard/theweek.com.txt deleted file mode 100755 index f98749e2..00000000 --- a/inc/3rdparty/site_config/standard/theweek.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | body: //div[@class="briefingEntry"] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thinkprogress.org.txt b/inc/3rdparty/site_config/standard/thinkprogress.org.txt deleted file mode 100755 index 1eec4e3c..00000000 --- a/inc/3rdparty/site_config/standard/thinkprogress.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author: //p[@class="byline"]/a | ||
2 | body: //div[@class="post"] | ||
3 | |||
4 | test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thisdaylive.com.txt b/inc/3rdparty/site_config/standard/thisdaylive.com.txt deleted file mode 100755 index 73b3c9ed..00000000 --- a/inc/3rdparty/site_config/standard/thisdaylive.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body'] | ||
2 | test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thisiscolossal.com.txt b/inc/3rdparty/site_config/standard/thisiscolossal.com.txt deleted file mode 100755 index ab16ce18..00000000 --- a/inc/3rdparty/site_config/standard/thisiscolossal.com.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | # Author: zinnober | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | # Set author | ||
7 | author: //a[contains(@rel, 'author')] | ||
8 | |||
9 | # Content is here | ||
10 | body: //article | ||
11 | |||
12 | # Tidy up before article | ||
13 | strip: //header | ||
14 | |||
15 | # Get rid of doubled images | ||
16 | strip: //img[contains(@class, '-hidden')] | ||
17 | |||
18 | # Tidy up after article | ||
19 | strip_id_or_class: social-list | ||
20 | strip_id_or_class: meta-info | ||
21 | strip: //footer | ||
22 | |||
23 | # Try it yourself | ||
24 | test_url: http://www.thisiscolossal.com/2014/09/chicago-in-the-fog-by-michael-salisbury/ | ||
25 | test_url: http://www.thisiscolossal.com/2014/09/bird-portraits-ruffling-with-personality-by-leila-jeffreys/ | ||
diff --git a/inc/3rdparty/site_config/standard/thisismynext.com.txt b/inc/3rdparty/site_config/standard/thisismynext.com.txt deleted file mode 100755 index 70b53995..00000000 --- a/inc/3rdparty/site_config/standard/thisismynext.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | author: //div[@class='meta clearfix']/a | ||
2 | body: //div[@class='post'] | ||
3 | |||
4 | strip: //div[@class='metaCat'] | ||
5 | strip: //div[@class='post']/h1 | ||
6 | strip: //div[@class='post']/div[@class='meta clearfix'] | ||
7 | strip: //div[@class='post']/div[@class='social-bar clearfix'] | ||
8 | test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tidbits.com.txt b/inc/3rdparty/site_config/standard/tidbits.com.txt deleted file mode 100755 index 1950e58e..00000000 --- a/inc/3rdparty/site_config/standard/tidbits.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | author: //span[@class='fn'] | ||
2 | date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|') | ||
3 | test_url: http://tidbits.com/article/12651 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/time.com.txt b/inc/3rdparty/site_config/standard/time.com.txt deleted file mode 100755 index f3f886bc..00000000 --- a/inc/3rdparty/site_config/standard/time.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'article-title')] | ||
2 | author: //article//span[contains(@class, 'byline')] | ||
3 | date: //time[@pubdate]/@datetime | ||
4 | body: //section[contains(@class, 'article-body')] | ||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | strip: //figcaption | ||
9 | strip: //p[contains(., 'MORE:') and ./a] | ||
10 | strip: //aside | ||
11 | |||
12 | test_url: http://time.com/14478/emotions-may-not-be-so-universal-after-all/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt deleted file mode 100755 index af1c23ce..00000000 --- a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@class="storytext"] | ||
3 | strip: //div[@id="thelogin"] | ||
4 | strip: //*[@class="hide"] | ||
5 | strip: //div[@id="anchored"] | ||
6 | test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tipb.com.txt b/inc/3rdparty/site_config/standard/tipb.com.txt deleted file mode 100755 index b8474d97..00000000 --- a/inc/3rdparty/site_config/standard/tipb.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | strip_id_or_class: featured-box | ||
4 | strip_id_or_class: postmeta | ||
5 | strip_id_or_class: respond | ||
6 | |||
7 | author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')] | ||
8 | date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ') | ||
9 | test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tnr.com.txt b/inc/3rdparty/site_config/standard/tnr.com.txt deleted file mode 100755 index 199f5d13..00000000 --- a/inc/3rdparty/site_config/standard/tnr.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1 | ||
2 | title: //div[contains(@class, 'article_detail')]//h1 | ||
3 | title: //h1 | ||
4 | |||
5 | body: //div[contains(@class, 'article_detail')] | ||
6 | |||
7 | author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3 | ||
8 | author: div[@class='author']//h3 | ||
9 | strip: //div[contains(@class, 'field-field-book-cover')] | ||
10 | |||
11 | date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '') | ||
12 | |||
13 | prune: no | ||
14 | |||
15 | single_page_link: //a[@class='print-page'] | ||
16 | |||
17 | test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tomdispatch.com.txt b/inc/3rdparty/site_config/standard/tomdispatch.com.txt deleted file mode 100755 index 701a2122..00000000 --- a/inc/3rdparty/site_config/standard/tomdispatch.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //div[@id='maincontent']//div[@class='title'] | ||
2 | body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat'] | ||
3 | |||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tomshardware.com.txt b/inc/3rdparty/site_config/standard/tomshardware.com.txt deleted file mode 100755 index 2b437574..00000000 --- a/inc/3rdparty/site_config/standard/tomshardware.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | tidy: no | ||
2 | title: //title | ||
3 | author: //a[@itemprop = 'author'] | ||
4 | date: //time[@itemprop = 'datePublished'] | ||
5 | body: //div[@id = 'intelliTXT'] | ||
6 | |||
7 | next_page_link: //li[@class="pagin next"]/a | ||
8 | test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tomshardware.de.txt b/inc/3rdparty/site_config/standard/tomshardware.de.txt deleted file mode 100755 index eee57ccf..00000000 --- a/inc/3rdparty/site_config/standard/tomshardware.de.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | body://div[@id="news-content"]/div[@id="intelliTXT"][1] | ||
2 | |||
3 | author://div[@id="header-news-infos"]/a[1] | ||
4 | |||
5 | date: //div[@id="header-news-infos"]/span[1] | ||
6 | |||
7 | title://h1[@id="header-news-title" and @class="hardwareTitle"][1] | ||
8 | |||
9 | strip://div[@id="news-content"]/div[@id="intelliTXT"]/table | ||
10 | |||
11 | footnotes: no | ||
12 | test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt deleted file mode 100755 index bb45d890..00000000 --- a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@class='post'] | ||
2 | |||
3 | strip: //div[@class='social'] | ||
4 | strip: //span[@class='next'] | ||
5 | strip: //span[@class='previous'] | ||
6 | test_url: http://toolsandtoys.net/noble-tonic-02/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/towerofthehand.com.txt b/inc/3rdparty/site_config/standard/towerofthehand.com.txt deleted file mode 100755 index a4d87d12..00000000 --- a/inc/3rdparty/site_config/standard/towerofthehand.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //div[@id='headline'] | ||
2 | body: //div[@class='entry_text'] | ||
3 | author: //div[text() = 'Author:']/following-sibling::div/a | ||
4 | date: //div[text() = 'Published:']/following-sibling::div | ||
5 | single_page_link: //a[@href='noscript.html'] | ||
6 | prune: no | ||
7 | |||
8 | test_url: http://towerofthehand.com/blog/2014/08/08-pitch-this-got-spinoff/index.html | ||
9 | test_url: http://towerofthehand.com/blog/2014/07/31-definitions-and-embodiments/index.html | ||
10 | test_url: http://towerofthehand.com/blog/2014/07/03-hero-with-thousand-faces/index.html | ||
diff --git a/inc/3rdparty/site_config/standard/tracks.ranea.org.txt b/inc/3rdparty/site_config/standard/tracks.ranea.org.txt deleted file mode 100755 index 5a386470..00000000 --- a/inc/3rdparty/site_config/standard/tracks.ranea.org.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | # Metadata | ||
2 | title: substring-after(//title, 'Coyote Tracks - ') | ||
3 | author: //meta[@name="author"]/@content | ||
4 | date: //div[@class="post_header"]/a | ||
5 | |||
6 | # Content Pruning | ||
7 | strip: //div[@class="column left"] | ||
8 | strip: //div[@class="pages"] | ||
9 | strip: //a[@class="text_title"] | ||
10 | strip: //ol[@class="notes"] | ||
11 | |||
12 | dissolve: //div[@class='column right']/ul | ||
13 | dissolve: //li[@class='post'] | ||
14 | test_url: http://tracks.ranea.org/post/31431060205/the-next-big-uh-slightly-taller-thing \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/trailer.web-view.net.txt b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt deleted file mode 100755 index e7a9c82d..00000000 --- a/inc/3rdparty/site_config/standard/trailer.web-view.net.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | title: concat(substring-before(//title,':'),': ',//div[@class='Date2']) | ||
2 | test_url: http://trailer.web-view.net/Show/0XC4EFE5D648B716BA2E134BC7CE61B9CC001E04F11E9434438186735DBD637488.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/trailerzone.de.txt b/inc/3rdparty/site_config/standard/trailerzone.de.txt deleted file mode 100755 index 02151a63..00000000 --- a/inc/3rdparty/site_config/standard/trailerzone.de.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@id='video' or @id='main'] | ||
2 | |||
3 | strip_id_or_class: socialshareprivacy2 | ||
4 | strip_id_or_class: wp_rp_first | ||
5 | |||
6 | find_string: Genre</strong> | ||
7 | replace_string: </strong></p><p><strong>Genre</strong> | ||
8 | |||
9 | test_url: http://www.trailerzone.de/g-i-joe-2-die-abrechnung/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/traningslara.se.txt b/inc/3rdparty/site_config/standard/traningslara.se.txt deleted file mode 100755 index d6cfb6db..00000000 --- a/inc/3rdparty/site_config/standard/traningslara.se.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //div[@class="Post-body"]//span[@class="PostHeader"] | ||
2 | author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"] | ||
3 | date: substring-before(//div[@class="PostHeaderIcons metadata"], '|') | ||
4 | body: //div[@class="Post-body"] | ||
5 | strip_id_or_class: print1 | ||
6 | strip_id_or_class: metadata | ||
7 | strip_id_or_class: authorbox | ||
8 | test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/triblive.com.txt b/inc/3rdparty/site_config/standard/triblive.com.txt deleted file mode 100755 index 663cafe1..00000000 --- a/inc/3rdparty/site_config/standard/triblive.com.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //title | ||
2 | author: //span/a | ||
3 | date: substring-after(//small,'Published:') | ||
4 | |||
5 | strip: //h1[@class='vert_class'] | ||
6 | strip: //h1[@class='headline'] | ||
7 | strip: //img[contains(@src,'logo_triblive.gif')] | ||
8 | |||
9 | #strip: //h6 | ||
10 | #strip_img_src: logo_triblive.gif | ||
11 | |||
12 | single_page_link: //a[@class='stprint'] | ||
13 | test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/truthdig.com.txt b/inc/3rdparty/site_config/standard/truthdig.com.txt deleted file mode 100755 index 9e0663b0..00000000 --- a/inc/3rdparty/site_config/standard/truthdig.com.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //div[@class='printbody']/h1 | ||
2 | body: //div[@class='printbody'] | ||
3 | prune: no | ||
4 | |||
5 | strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/'] | ||
6 | strip: //table[@class='footer'] | ||
7 | strip: //h6[contains(., 'http://')] | ||
8 | |||
9 | single_page_link: //a[contains(@href, '/print/')] | ||
10 | |||
11 | test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/ | ||
12 | test_url: http://www.truthdig.com/dig/item/the_death_of_truth_20130505/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tthfanfic.org.txt b/inc/3rdparty/site_config/standard/tthfanfic.org.txt deleted file mode 100755 index 63537c10..00000000 --- a/inc/3rdparty/site_config/standard/tthfanfic.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h2 | ||
2 | author: //a[starts-with(@href, '/AuthorStories')] | ||
3 | body: //div[@id='storyinnerbody'] | ||
4 | test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tthor.com.txt b/inc/3rdparty/site_config/standard/tthor.com.txt deleted file mode 100755 index 902fcd13..00000000 --- a/inc/3rdparty/site_config/standard/tthor.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | prune: no | ||
2 | test_url: http://www.tthor.com/06/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tuaw.com.txt b/inc/3rdparty/site_config/standard/tuaw.com.txt deleted file mode 100755 index 2af00c27..00000000 --- a/inc/3rdparty/site_config/standard/tuaw.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class='posttitle'] | ||
2 | author: //span[@class='author']/a | ||
3 | date: //span[@class='timestamp'] | ||
4 | body: //div[@class='body'] | ||
5 | |||
6 | test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tuckreview.com.txt b/inc/3rdparty/site_config/standard/tuckreview.com.txt deleted file mode 100755 index 6e18e3da..00000000 --- a/inc/3rdparty/site_config/standard/tuckreview.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class='post-title'] | ||
2 | author: //div[@class='display-name'] | ||
3 | date: //div[@class='date'] | ||
4 | body: //div[@class='body'] | ||
5 | footnotes: no | ||
6 | test_url: http://tuckreview.com/2012/8/14/migrating-to-v6 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tuhdo.github.io.txt b/inc/3rdparty/site_config/standard/tuhdo.github.io.txt deleted file mode 100644 index beb551fd..00000000 --- a/inc/3rdparty/site_config/standard/tuhdo.github.io.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | # Generated by FiveFilters.org's web-based selection tool | ||
2 | # Place this file inside your site_config/custom/ folder | ||
3 | # Source: http://siteconfig.fivefilters.org/grab.php?url=https%3A%2F%2Ftuhdo.github.io%2Femacs-tutor.html | ||
4 | |||
5 | body: //div[@id='content'] | ||
6 | strip_id_or_class: table-of-contents | ||
7 | test_url: https://tuhdo.github.io/emacs-tutor.html | ||
diff --git a/inc/3rdparty/site_config/standard/tvtropes.org.txt b/inc/3rdparty/site_config/standard/tvtropes.org.txt deleted file mode 100755 index 3cc3a9cf..00000000 --- a/inc/3rdparty/site_config/standard/tvtropes.org.txt +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | # Google Custom Search | ||
2 | strip_id_or_class: google_branding_style | ||
3 | |||
4 | # Avoid double title | ||
5 | strip_id_or_class: pagetitle | ||
6 | |||
7 | # external links are labelled | ||
8 | strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif | ||
9 | |||
10 | title: //div[@class="pagetitle"] | ||
11 | body: //div[@id="wikitext"] | ||
12 | |||
13 | # don't get clever. | ||
14 | strip_comments: no | ||
15 | prune: no | ||
16 | |||
17 | # navigation in footer lives inside the wikitext div, annoyingly. | ||
18 | strip_id_or_class: pathholder | ||
19 | |||
20 | test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/twitter.com.txt b/inc/3rdparty/site_config/standard/twitter.com.txt deleted file mode 100755 index 0e5b7487..00000000 --- a/inc/3rdparty/site_config/standard/twitter.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //title | ||
2 | body: (//p[contains(@class, 'js-tweet-text')])[1] | ||
3 | author: (//strong[contains(@class, 'fullname')])[1] | ||
4 | date: //span[contains(@class, 'js-short-timestamp')]/@data-time | ||
5 | |||
6 | prune: no | ||
7 | tidy: no | ||
8 | |||
9 | test_url: https://twitter.com/medialens/status/216883678582804480 | ||
10 | test_contains: is all but alone in challenging the tsunami of UK | ||
diff --git a/inc/3rdparty/site_config/standard/uefa.com.txt b/inc/3rdparty/site_config/standard/uefa.com.txt deleted file mode 100755 index 3469be03..00000000 --- a/inc/3rdparty/site_config/standard/uefa.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText'] | ||
2 | strip: //div[contains(@class, 'mpindex')] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt deleted file mode 100755 index cd9c1361..00000000 --- a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | # applies to uk.ds.ign.com, uk.wii.ign.com etc. | ||
2 | # possibly to non-UK versions, but I can’t test that | ||
3 | |||
4 | title: //h1[@class="headline"] | ||
5 | author: //div[@class="hdr-sub byline"]/a | ||
6 | date: //h2[@class="publish-date"]/span | ||
7 | body: //div[@id="main-article-content"] | ||
8 | |||
9 | strip: //ul[@class="lnks-readmore"] | ||
10 | |||
11 | strip: //div[@class="inlineImageCaption"] | ||
12 | # can’t make the images appear, so remove the captions | ||
13 | |||
14 | strip: //div[@style="width:468px"] | ||
15 | # video caption links | ||
16 | |||
17 | convert_double_br_tags: yes | ||
18 | |||
19 | strip_comments: no | ||
20 | # otherwise the ‘Closing Comments’ are removed | ||
21 | |||
22 | # Ratings box could do with some rearranging, but it’s tricky | ||
23 | test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uni-watch.com.txt b/inc/3rdparty/site_config/standard/uni-watch.com.txt deleted file mode 100755 index 4a5ae344..00000000 --- a/inc/3rdparty/site_config/standard/uni-watch.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on') | ||
2 | date: substring-after(//div[@class='post-byline'], ', on') | ||
3 | |||
4 | # for some reason, the following is producing a "no text [48]" error | ||
5 | #title: //div[@class='post-headline'] | ||
6 | |||
7 | # for some reason, the following doesn't appear to isolate just the body copy | ||
8 | body: //div[@class='post-bodycopy'] | ||
9 | |||
10 | # we solve the above issue by stripping out everything else we don't want | ||
11 | # these can probably all be removed if the body: command above worked | ||
12 | strip_id_or_class: reply | ||
13 | strip_id_or_class: left | ||
14 | strip_id_or_class: post-headline | ||
15 | strip_id_or_class: post-byline | ||
16 | strip_id_or_class: footer | ||
17 | test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/unwinnable.com.txt b/inc/3rdparty/site_config/standard/unwinnable.com.txt deleted file mode 100755 index 05ad86a5..00000000 --- a/inc/3rdparty/site_config/standard/unwinnable.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h1[@class='postTitle'] | ||
2 | author: //a[@rel='author'] | ||
3 | date: substring-before(//h4[@class='postAuthor'], '|') | ||
4 | body: //div[@class='postContent'] | ||
5 | |||
6 | strip: //div[@class='simplePullQuote'] | ||
7 | |||
8 | wrap_in(figure): //img | ||
9 | test_url: http://www.unwinnable.com/2013/04/23/gratifying-play/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uppsalafria.se.txt b/inc/3rdparty/site_config/standard/uppsalafria.se.txt deleted file mode 100755 index 79c59ece..00000000 --- a/inc/3rdparty/site_config/standard/uppsalafria.se.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] | ||
2 | author: //article//div[contains(@class, 'field-byline')] | ||
3 | strip_id_or_class: rekommenderade | ||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: annonser | ||
6 | |||
7 | test_url: http://www.uppsalafria.se/artikel/97167 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/urbandictionary.com.txt b/inc/3rdparty/site_config/standard/urbandictionary.com.txt deleted file mode 100755 index 385c95ca..00000000 --- a/inc/3rdparty/site_config/standard/urbandictionary.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //title | ||
2 | body: //table[@id='entries'] | ||
3 | test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass | ||
diff --git a/inc/3rdparty/site_config/standard/usatoday.com.txt b/inc/3rdparty/site_config/standard/usatoday.com.txt deleted file mode 100755 index 710a7b37..00000000 --- a/inc/3rdparty/site_config/standard/usatoday.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | date: //meta[@itemprop="datePublished"]/@content | ||
2 | author: //div[@itemprop="author"] | ||
3 | body: //div[@itemprop='articleBody'] | ||
4 | |||
5 | strip_id_or_class: share-tools | ||
6 | |||
7 | test_url: http://www.usatoday.com/story/news/world/2014/03/18/malaysia-plane-search/6552429/ | ||
8 | test_url: http://rssfeeds.usatoday.com/usatoday-NewsTopStories \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/usccb.org.txt b/inc/3rdparty/site_config/standard/usccb.org.txt deleted file mode 100755 index 30c28823..00000000 --- a/inc/3rdparty/site_config/standard/usccb.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@id='CS_Element_maincontent'] | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.usccb.org/bible/readings/072412.cfm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/useit.com.txt b/inc/3rdparty/site_config/standard/useit.com.txt deleted file mode 100755 index b8511c7c..00000000 --- a/inc/3rdparty/site_config/standard/useit.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | date: substring-after(//p[@class='overline']/strong, ',') | ||
4 | body: //div[@class="maintext"] | ||
5 | strip: //p[@class='overline'] | ||
6 | strip: //h1 | ||
7 | tidy: no | ||
8 | test_url: http://www.useit.com/alertbox/mobile-startup-screen.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/usfirst.org.txt b/inc/3rdparty/site_config/standard/usfirst.org.txt deleted file mode 100755 index f02b2d3e..00000000 --- a/inc/3rdparty/site_config/standard/usfirst.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //meta[@property='dc:title']/@content | ||
2 | date: //div[@class='content']//span[@property='dc:date']/@content | ||
3 | body: //div[@property='content:encoded'] | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.usfirst.org/roboticsprograms/frc/Photo-From-Kickoff-Filming \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt deleted file mode 100755 index d37911bc..00000000 --- a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //h1 | ||
2 | author: //*[@class='byline'] | ||
3 | date: substring-after(//*[@class='pubdatetime'], 'Published: ') | ||
4 | body: //*[@class='body-block'] | ||
5 | test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ux.artu.tv.txt b/inc/3rdparty/site_config/standard/ux.artu.tv.txt deleted file mode 100755 index c69f2df9..00000000 --- a/inc/3rdparty/site_config/standard/ux.artu.tv.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | author: ("Arturo Toledo") | ||
2 | title: //div[@class="post"]/h2 | ||
3 | body: //div[@class="entry"] | ||
4 | |||
5 | # Remove Twitter button | ||
6 | strip: //div[@class="entry"]/p[2]/a/img | ||
7 | test_url: http://ux.artu.tv/?p=192 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt deleted file mode 100755 index 3661b06a..00000000 --- a/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | title:h1 | ||
2 | test_url: http://www.uzivatelsketestovani.cz/wiki/doku.php/skoleni-axure-rp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vanityfair.com.txt b/inc/3rdparty/site_config/standard/vanityfair.com.txt deleted file mode 100755 index f52339cf..00000000 --- a/inc/3rdparty/site_config/standard/vanityfair.com.txt +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')] | ||
3 | date: //div[contains(@class, 'cn_date_time')] | ||
4 | body: //div[contains(@class, 'pageContainers')] | ||
5 | body: //div[@id='main'] | ||
6 | body: //article[@id='items-container'] | ||
7 | #body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container'] | ||
8 | |||
9 | strip_id_or_class: bc | ||
10 | strip_id_or_class: utilities | ||
11 | strip_id_or_class: list-supporting | ||
12 | strip_id_or_class: yrail | ||
13 | strip_id_or_class: urail | ||
14 | |||
15 | prune: no | ||
16 | #tidy: no | ||
17 | |||
18 | strip_id_or_class: super-rubric-section | ||
19 | strip_id_or_class: cn_date_time | ||
20 | strip_id_or_class: cn_contributors | ||
21 | strip_id_or_class: cn_pagination_controls | ||
22 | strip_id_or_class: cn_features_container | ||
23 | strip_id_or_class: global-footer | ||
24 | strip_id_or_class: cn_ecom_placement | ||
25 | strip: //li[@class='blogNavPrev'] | ||
26 | |||
27 | single_page_link: //a[@title='Print this page'] | ||
28 | |||
29 | test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105 | ||
30 | test_contains: nothing can take away from the miracle of Tahrir Square | ||
31 | |||
32 | test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808 | ||
33 | test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201 | ||
diff --git a/inc/3rdparty/site_config/standard/varingen.no.txt b/inc/3rdparty/site_config/standard/varingen.no.txt deleted file mode 100755 index c0133c95..00000000 --- a/inc/3rdparty/site_config/standard/varingen.no.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class='ArticleHeadlineDetailedView'] | ||
2 | date: //span[@class='ArticlePublicationDateTimeDetailedView'] | ||
3 | author://span[@class='ArticleBylineDetailedView'] | ||
4 | body: //div[@class='ArticleTextDetailedView'] | ||
5 | test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/varsity.co.uk.txt b/inc/3rdparty/site_config/standard/varsity.co.uk.txt deleted file mode 100755 index dfbf69cf..00000000 --- a/inc/3rdparty/site_config/standard/varsity.co.uk.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | # FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser | ||
2 | |||
3 | strip: //h2 | ||
4 | test_url: http://www.varsity.co.uk/reviews/2662 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vea.gov.vn.txt b/inc/3rdparty/site_config/standard/vea.gov.vn.txt deleted file mode 100755 index 9c8420ce..00000000 --- a/inc/3rdparty/site_config/standard/vea.gov.vn.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title://div[@class="detail-new-title"] | ||
2 | body://div[@class="innerpad"] | ||
3 | strip://div[@class="ArticleUtility"] | ||
4 | strip://div[@class="commentPost"] | ||
5 | strip://div[@class="comment-box"] | ||
6 | strip://div[@id="TinLienQuan"] | ||
7 | test_url: http://vea.gov.vn/vn/tintuc/tintuchangngay/Pages/T%C4%83ng-c%C6%B0%E1%BB%9Dng-b%E1%BA%A3o-t%E1%BB%93n-%C4%91%E1%BB%99ng-v%E1%BA%ADt-hoang-d%C3%A3-%E1%BB%9F-Vi%E1%BB%87t-Nam.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vedomosti.ru.txt b/inc/3rdparty/site_config/standard/vedomosti.ru.txt deleted file mode 100755 index 265f9fc7..00000000 --- a/inc/3rdparty/site_config/standard/vedomosti.ru.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //td[@class='second_content']/h1 | ||
2 | body: //td[@class='second_content']/div[@class='article_text'] | ||
3 | test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/veggbilder.no.txt b/inc/3rdparty/site_config/standard/veggbilder.no.txt deleted file mode 100755 index 2a44c317..00000000 --- a/inc/3rdparty/site_config/standard/veggbilder.no.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | author: //div[@class="blogginnleggForfatter"] | ||
2 | date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd']) | ||
3 | strip: //div[contains(@id,"bloggDelingslenker")] | ||
4 | strip: //div[contains(@id,"bloggDelingslenker")] | ||
5 | test_url: http://veggbilder.no/blogginnlegg/fristelser \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vemedio.com.txt b/inc/3rdparty/site_config/standard/vemedio.com.txt deleted file mode 100755 index d22fc5cf..00000000 --- a/inc/3rdparty/site_config/standard/vemedio.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2 | ||
2 | date: substring-before(//small," • Permalink") | ||
3 | author:string('Martin Hering') | ||
4 | |||
5 | Strip: //p/small | ||
6 | test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/venturebeat.com.txt b/inc/3rdparty/site_config/standard/venturebeat.com.txt deleted file mode 100755 index d6321d79..00000000 --- a/inc/3rdparty/site_config/standard/venturebeat.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | author: //div[@class="author-name"] | ||
3 | date: //span[@class="the-time"] | ||
4 | body: //div[@class="entry-content"] | ||
5 | strip: //div[@class="vb-gallery"] | ||
6 | test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/version.php b/inc/3rdparty/site_config/standard/version.php deleted file mode 100644 index 34a87357..00000000 --- a/inc/3rdparty/site_config/standard/version.php +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | <?php return 4; ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/version.txt b/inc/3rdparty/site_config/standard/version.txt deleted file mode 100644 index eaf01ebd..00000000 --- a/inc/3rdparty/site_config/standard/version.txt +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | 2013-05-12T22:53:07Z \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/version2.dk.txt b/inc/3rdparty/site_config/standard/version2.dk.txt deleted file mode 100755 index 418b83a1..00000000 --- a/inc/3rdparty/site_config/standard/version2.dk.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //article/header/h1 | ||
2 | |||
3 | author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a | ||
4 | date: //article/header/section[@class='byline']/span[@class='published']/span | ||
5 | |||
6 | body: //article/section[@class='body'] | ||
7 | |||
8 | convert_double_br_tags: yes | ||
9 | |||
10 | # This is required, because Tidy chokes on the HTML5 tags... | ||
11 | tidy: no | ||
12 | test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/verybestbaking.com.txt b/inc/3rdparty/site_config/standard/verybestbaking.com.txt deleted file mode 100755 index ad0fec66..00000000 --- a/inc/3rdparty/site_config/standard/verybestbaking.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //title | ||
2 | body: //div[contains(@class, 'printRecipe')] | ||
3 | strip: //div[@class='recipeHeader'] | ||
4 | prune: no | ||
5 | tidy: no | ||
6 | single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')] | ||
7 | test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vg.no.txt b/inc/3rdparty/site_config/standard/vg.no.txt deleted file mode 100755 index bfadb4a7..00000000 --- a/inc/3rdparty/site_config/standard/vg.no.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@id='artikkelspalte'] | ||
2 | strip_id_or_class: 'breadcrumb' | ||
3 | test_url: http://www.vg.no/spill/artikkel.php?artid=10003628 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/video.forbes.com.txt b/inc/3rdparty/site_config/standard/video.forbes.com.txt deleted file mode 100755 index 5db77463..00000000 --- a/inc/3rdparty/site_config/standard/video.forbes.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: concat("Video: ", //div[@id='currentVideoTitleDivId']) | ||
2 | body: //div[@id='currentVideoDescriptionId'] | ||
3 | author: //meta[@name='author']/@content | ||
4 | |||
5 | replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease | ||
6 | |||
7 | replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease | ||
8 | |||
9 | test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/videogum.com.txt b/inc/3rdparty/site_config/standard/videogum.com.txt deleted file mode 100755 index d93780ca..00000000 --- a/inc/3rdparty/site_config/standard/videogum.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2[@class='posttitle'] | ||
2 | date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by') | ||
3 | date: //span[@class='postdate'] | ||
4 | author: //span[@class='postdate']/a | ||
5 | body: //div[@class='entry line_top'] | ||
6 | test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/villagevoice.com.txt b/inc/3rdparty/site_config/standard/villagevoice.com.txt deleted file mode 100755 index 36e4a2f5..00000000 --- a/inc/3rdparty/site_config/standard/villagevoice.com.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h2[@class='headline'] | ||
2 | |||
3 | body: //div[@class='ContentPrint'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | single_page_link: //a[contains(@href, '/printVersion/')] | ||
8 | |||
9 | test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vimeo.com.txt b/inc/3rdparty/site_config/standard/vimeo.com.txt deleted file mode 100755 index f36c9c57..00000000 --- a/inc/3rdparty/site_config/standard/vimeo.com.txt +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | title: //title | ||
2 | body: //iframe | ||
3 | |||
4 | find_string: <html><iframe | ||
5 | replace_string: <iframe id="video" | ||
6 | |||
7 | find_string: ></iframe></html> | ||
8 | replace_string: ></iframe> | ||
9 | |||
10 | replace_string("): " | ||
11 | |||
12 | single_page_link: //link[@type='text/xml+oembed'] | ||
13 | |||
14 | prune: no | ||
15 | tidy: no | ||
16 | |||
17 | test_url: http://vimeo.com/35941909 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/viply.de.txt b/inc/3rdparty/site_config/standard/viply.de.txt deleted file mode 100755 index e3599c9d..00000000 --- a/inc/3rdparty/site_config/standard/viply.de.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //div[@id='singletext']//h1 | ||
2 | body: //div[contains(@class, 'mypictureborder')] | //div[@id='singletext'] | ||
3 | prune: no | ||
4 | |||
5 | strip_id_or_class: singletostart | ||
6 | strip_id_or_class: navigation | ||
7 | strip_id_or_class: social | ||
8 | strip_id_or_class: single_topwrapper | ||
9 | strip: //a[contains(., 'Nächster Artikel')] | ||
10 | |||
11 | test_url: http://www.viply.de/?p=87973 | ||
12 | test_url: http://www.viply.de/?feed=rss2 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/visir.is.txt b/inc/3rdparty/site_config/standard/visir.is.txt deleted file mode 100755 index 04e09102..00000000 --- a/inc/3rdparty/site_config/standard/visir.is.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | # Author's name, when present, has 'skrifar:' ('writes:') appended to it. | ||
2 | # In case of multiple authors, this would be 'skrifa:', hence only 7 characters | ||
3 | # are stripped off. | ||
4 | author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7) | ||
5 | |||
6 | date: //span[@class='date'] | ||
7 | title: //h1 | ||
8 | body: //div[@class='paragraph'] | ||
9 | |||
10 | # Strip out author string when present | ||
11 | strip: //div[@class='paragraph']/div[@class='meta'] | ||
12 | |||
13 | convert_double_br_tags: yes | ||
14 | test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vitispr.com.txt b/inc/3rdparty/site_config/standard/vitispr.com.txt deleted file mode 100755 index f2d11c7c..00000000 --- a/inc/3rdparty/site_config/standard/vitispr.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | strip: //*[(@id = "ja-search")] | ||
2 | body: //*[(@id = "ja-mainbody")] | ||
3 | body: //*[(@id = "content-mass-bottom")] | ||
4 | strip://h3[contains(span,'Related Posts')] | ||
5 | strip://img | ||
6 | test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vivirmexico.com.txt b/inc/3rdparty/site_config/standard/vivirmexico.com.txt deleted file mode 100755 index e6a72700..00000000 --- a/inc/3rdparty/site_config/standard/vivirmexico.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://vivirmexico.com/2011/09/en-veracruz-arrojan-35-cuerpos-a-plena-luz-del-dia-esta-si-es-una-alarma-social \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vnexpress.net.txt b/inc/3rdparty/site_config/standard/vnexpress.net.txt deleted file mode 100755 index e5ebc435..00000000 --- a/inc/3rdparty/site_config/standard/vnexpress.net.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table | ||
2 | strip://div[@class="box-item"] | ||
3 | strip://div[@id="ARTICLE_BANNER"] | ||
4 | strip://a | ||
5 | strip://div[@class="tag-parent"] | ||
6 | strip://div[@class="email-print txtr"] | ||
7 | |||
8 | test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt deleted file mode 100755 index b754aeb8..00000000 --- a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@class='entrytext'] | ||
3 | test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vworker.com.txt b/inc/3rdparty/site_config/standard/vworker.com.txt deleted file mode 100755 index cfb9ea1c..00000000 --- a/inc/3rdparty/site_config/standard/vworker.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[contains(@class, 'KonaBody')] | ||
2 | |||
3 | test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt deleted file mode 100755 index e92757d7..00000000 --- a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h2[@class="title"] | ||
2 | body: //div[@class="post"] | ||
3 | |||
4 | test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt deleted file mode 100755 index c53eb0dd..00000000 --- a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | title: //div[@id='pr']/h3 | ||
2 | author: //div[@class='dateline']//a[contains(@href, '/author/')] | ||
3 | |||
4 | # print page | ||
5 | body: //div[@id='prbody'] | ||
6 | # standard page | ||
7 | body: //div[@id='pgbody'] | ||
8 | |||
9 | # for multi-page articles | ||
10 | single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')] | ||
11 | |||
12 | prune: no | ||
13 | |||
14 | test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/warnerbros.fr.txt b/inc/3rdparty/site_config/standard/warnerbros.fr.txt deleted file mode 100755 index 21f56352..00000000 --- a/inc/3rdparty/site_config/standard/warnerbros.fr.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h3 | ||
2 | body: //div[@class="content_wysiwyg"] | ||
3 | test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt b/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt deleted file mode 100755 index 17f45677..00000000 --- a/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@class='main']//article | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.washingtoninstitute.org/policy-analysis/view/striking-syria-lessons-from-the-israeli-experience?goback=.gde_3822158_member_273623672 | ||
6 | test_url: http://www.washingtoninstitute.org/rss/11/10 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt deleted file mode 100755 index 8f8902a5..00000000 --- a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title://a[@class = 'headline-article'] | ||
2 | |||
3 | author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ') | ||
4 | date://div[@class = 'article']/span[@class = 'date'] | ||
5 | body://div[@class = 'article'] | ||
6 | single_page_link://a[@class = 'print'] | ||
7 | strip://p[@class = 'author'] | ||
8 | strip://a[@class = 'headline-article'] | ||
9 | strip://span[@class = 'date'] | ||
10 | test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/washingtonpost.com.txt b/inc/3rdparty/site_config/standard/washingtonpost.com.txt deleted file mode 100755 index 0aa9f1d8..00000000 --- a/inc/3rdparty/site_config/standard/washingtonpost.com.txt +++ /dev/null | |||
@@ -1,32 +0,0 @@ | |||
1 | # Seems to be redirecting to articles.washingtonpost.com for many users | ||
2 | |||
3 | body: //div[contains(@class, "article_body")] | ||
4 | # print view | ||
5 | body: //div[@id='print_facet']//div[@id='body'] | ||
6 | |||
7 | author://meta[@name='DC.creator']/@content | ||
8 | title://meta[@name='title']/@content | ||
9 | date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title | ||
10 | date://meta[@name="DC.date.issued"]/@content | ||
11 | strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] | ||
12 | strip://div[@id="wp-column six end"] | ||
13 | strip://div[contains(@class,'hidden')] | ||
14 | strip://div[@id='article-side-rail'] | ||
15 | strip://div[@class="module component todays-paper-module curved"] | ||
16 | strip://div[@class="module component live-qa curved img-border"] | ||
17 | strip://div[@class="module component newsletter-signup curved"] | ||
18 | strip://div[@class="module featured-stories component curved img-border"] | ||
19 | |||
20 | strip_id_or_class: carousel | ||
21 | strip_id_or_class: toolbar | ||
22 | strip_id_or_class: module | ||
23 | |||
24 | # Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html | ||
25 | single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html") | ||
26 | |||
27 | # [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html | ||
28 | #single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html") | ||
29 | |||
30 | test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1 | ||
31 | test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html | ||
32 | test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/web-libre.org.txt b/inc/3rdparty/site_config/standard/web-libre.org.txt deleted file mode 100755 index 9ed43a25..00000000 --- a/inc/3rdparty/site_config/standard/web-libre.org.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | body: //div[@id='template_article'] | ||
2 | |||
3 | strip_id_or_class: article_more | ||
4 | strip: //hr | ||
5 | |||
6 | test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt deleted file mode 100755 index 578ba523..00000000 --- a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title://div[@class="post"]/h2 | ||
2 | author://p[@class="postinfo"]/a | ||
3 | date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ') | ||
4 | body://div[@class="contenttext"] | ||
5 | test_url: http://weblog.bignerdranch.com/?p=304 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt deleted file mode 100755 index 7cfa49d2..00000000 --- a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | title: //h2[@class="pageTitle"] | ||
2 | strip: //div[@class="postfoot"] | ||
3 | strip: //h2[@class="pageTitle"] | ||
4 | strip: //h3[@class="pageTitle"] | ||
5 | body: //div[@class="post"] | ||
6 | author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed') | ||
7 | date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by') | ||
8 | |||
9 | test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt deleted file mode 100755 index cea10147..00000000 --- a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | tidy: no | ||
2 | dissolve: //div[@id="content"]/div/article/header | ||
3 | body: //div[@id="content"]/div/article | ||
4 | title: //div[@id="content"]/div/article/h1 | ||
5 | date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"] | ||
6 | strip: //div[@id="content"]/div/article/h1 | ||
7 | |||
8 | test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/webwereld.nl.txt b/inc/3rdparty/site_config/standard/webwereld.nl.txt deleted file mode 100755 index 40a5aa36..00000000 --- a/inc/3rdparty/site_config/standard/webwereld.nl.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | strip: //*[@class="paginator"] | ||
2 | body: //*[@id="articleText"] | ||
3 | next_page_link: //a[@class="next"] | ||
4 | |||
5 | # No author detection | ||
6 | # No publishing date detection | ||
7 | # No author and intro deduplication over multiple pages | ||
8 | test_url: http://webwereld.nl/analyse/111452/de-code-van-dorifel-nader-bekeken.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/welt.de.txt b/inc/3rdparty/site_config/standard/welt.de.txt deleted file mode 100755 index 42e65e97..00000000 --- a/inc/3rdparty/site_config/standard/welt.de.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | # set body | ||
2 | tidy: no | ||
3 | body: //div[contains(@class, 'articleContent')] | ||
4 | |||
5 | # remove clutter | ||
6 | strip: //div[@class='advertising'] | ||
7 | strip: //div[@class='themenalarm'] | ||
8 | strip: //div[contains(@class, 'inTextTeaser')] | ||
9 | |||
10 | # remove captions | ||
11 | strip: //span[@class='copyRight'] | ||
12 | |||
13 | # remove photo galleries and extras | ||
14 | strip: //div[contains(@class, 'textGallery')] | ||
15 | strip: //div[contains(@class, 'videoGallery')] | ||
16 | strip: //div[contains(@class, 'imageGallery')] | ||
17 | strip: //div[contains(@class, 'openContent')] | ||
18 | |||
19 | # remove comments | ||
20 | strip: //div[@id = 'writeComment'] | ||
21 | |||
22 | test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt deleted file mode 100755 index 3132e98a..00000000 --- a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: substring-before(//title, '«') | ||
2 | |||
3 | body: //div[@class='entry'] | ||
4 | strip: //div[@class='sharing_label'] | ||
5 | strip: //div[@class='snap_nopreview sharing robots-nocontent'] | ||
6 | test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt deleted file mode 100755 index a88a02c9..00000000 --- a/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | autodetect_next_page: no | ||
2 | test_url: http://what-if.xkcd.com/1/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt deleted file mode 100755 index 100a8c88..00000000 --- a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | strip: //div[@class="navigation"] | ||
2 | strip: //div[@id="sidebar"] | ||
3 | strip: //div[@id="post-extra-content"] | ||
4 | strip: //div[@id="footer"] | ||
5 | strip: //div[contains(@class, "sharing")] | ||
6 | |||
7 | test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wheelyric.com.txt b/inc/3rdparty/site_config/standard/wheelyric.com.txt deleted file mode 100755 index b9eeaa0c..00000000 --- a/inc/3rdparty/site_config/standard/wheelyric.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | body://div[contains(@class,'oAndtLyrics')] | ||
2 | strip://div[contains(@class,'info')] | ||
3 | strip://div[contains(@id,'romanization')] | ||
4 | strip://div[contains(@id,'youtube')] | ||
5 | strip://div[contains(@id,'romanizationSelector')] | ||
6 | strip://div[contains(@id,'langSelectWrap')] | ||
7 | strip://div[contains(@id,'requestTranslationWrap')] | ||
8 | strip://div[contains(@id,'viewMore')] | ||
9 | strip://div[contains(@class,'lyricsListInMainContent')] | ||
10 | strip://div[contains(@class,'descIpNoti')] | ||
11 | test_url: http://wheelyric.com/lyrics/121#2 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt deleted file mode 100755 index b80fe5d1..00000000 --- a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='content'] | ||
3 | strip_id_or_class: editsection | ||
4 | strip_id_or_class: toc | ||
5 | strip: //div[@id='siteNotice'] | ||
6 | strip: //div[@id='content']//table[last()] | ||
7 | prune: no | ||
8 | test_url: http://wiki.guildwars.com/wiki/Monk \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt deleted file mode 100755 index e9233998..00000000 --- a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='content'] | ||
3 | strip_id_or_class: editsection | ||
4 | strip_id_or_class: toc | ||
5 | strip: //div[@id='siteNotice'] | ||
6 | strip: //div[@id='content']//table[last()] | ||
7 | prune: no | ||
8 | test_url: http://wiki.guildwars2.com/wiki/Guardian \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wikihow.com.txt b/inc/3rdparty/site_config/standard/wikihow.com.txt deleted file mode 100755 index fe95d3f9..00000000 --- a/inc/3rdparty/site_config/standard/wikihow.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | # ...&printable=yes | ||
2 | body: //div[@id='bodycontents'] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | strip_id_or_class: gatEditSection | ||
6 | strip_id_or_class: relatedwikihows | ||
7 | #strip: //div[contains(@class, 'step_num')] | ||
8 | |||
9 | replace_string(<script ): <div style="display: none" | ||
10 | replace_string(</script>): </div> | ||
11 | |||
12 | single_page_link: //a[@id='gatPrintView'] | ||
13 | single_page_link: concat(//link[@rel='canonical']/@href, '?printable=yes') | ||
14 | |||
15 | test_url: http://www.wikihow.com/Start-Your-Own-Country \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wikitravel.org.txt b/inc/3rdparty/site_config/standard/wikitravel.org.txt deleted file mode 100755 index 1f32a372..00000000 --- a/inc/3rdparty/site_config/standard/wikitravel.org.txt +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | # copied from .wikipedia.org.txt | ||
2 | title: //h1[@id='firstHeading' or @class='firstHeading'] | ||
3 | body: //div[@id = 'bodyContent'] | ||
4 | strip_id_or_class: editsection | ||
5 | #strip_id_or_class: toc | ||
6 | strip_id_or_class: vertical-navbox | ||
7 | strip: //table[@id='toc'] | //div[@id='p-toc'] | ||
8 | strip: //div[@id='catlinks' or @id='contentSub'] | ||
9 | strip: //div[@id='jump-to-nav'] | ||
10 | strip: //div[@class='thumbcaption']//div[@class='magnify'] | ||
11 | strip: //table[@class='navbox'] | ||
12 | prune: no | ||
13 | tidy: no | ||
14 | test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/will-self.com.txt b/inc/3rdparty/site_config/standard/will-self.com.txt deleted file mode 100755 index 394f9ca4..00000000 --- a/inc/3rdparty/site_config/standard/will-self.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | strip: //div[@class="widget-area"] | ||
2 | title: //*[@class="entry-title"] | ||
3 | date: //time[@class="entry-date"] | ||
4 | test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/williampfaff.com.txt b/inc/3rdparty/site_config/standard/williampfaff.com.txt deleted file mode 100755 index cefabec0..00000000 --- a/inc/3rdparty/site_config/standard/williampfaff.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: substring-after(//span[@class='itemTitle'], ':') | ||
2 | body: //div[@id='content'] | ||
3 | test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/winfuture.de.txt b/inc/3rdparty/site_config/standard/winfuture.de.txt deleted file mode 100755 index dddc6f9e..00000000 --- a/inc/3rdparty/site_config/standard/winfuture.de.txt +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | title: //h1/span | ||
2 | |||
3 | body: //div[@id="news_content"] | ||
4 | |||
5 | author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text() | ||
6 | |||
7 | date: //span[@class='date'] | ||
8 | |||
9 | # Rubrikenbild entfernen | ||
10 | strip: //div[@id="news_content"]/a[1] | ||
11 | |||
12 | test_url: http://winfuture.de/news,69672.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/winrumors.com.txt b/inc/3rdparty/site_config/standard/winrumors.com.txt deleted file mode 100755 index f25f9c9e..00000000 --- a/inc/3rdparty/site_config/standard/winrumors.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h1[@class='page-heading'] | ||
2 | author: //small/strong/a | ||
3 | #their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time' | ||
4 | date: substring-before(substring-after(//small,'on'),'with') | ||
5 | body: //div[@class='entry'] | ||
6 | test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/winsupersite.com.txt b/inc/3rdparty/site_config/standard/winsupersite.com.txt deleted file mode 100755 index f725b67a..00000000 --- a/inc/3rdparty/site_config/standard/winsupersite.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | date: //*[@class='kicker'] | ||
2 | body: //*[@class='KonaBody'] | ||
3 | test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wired.com.txt b/inc/3rdparty/site_config/standard/wired.com.txt deleted file mode 100755 index f5a72d14..00000000 --- a/inc/3rdparty/site_config/standard/wired.com.txt +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | title: //meta[@name='Title']/@content | ||
2 | author: //meta[@name='Author']/@content | ||
3 | date: //meta[@name='DisplayDate']/@content | ||
4 | body: //div[@class='entry'] | ||
5 | strip: //p[contains(., 'Pages:') and contains(., 'View All')] | ||
6 | strip: //p[@class='caption'] | ||
7 | strip: //div[@class='desc' or @class='slide' or @id='slide-info'] | ||
8 | |||
9 | strip_id_or_class: pullquote | ||
10 | strip_id_or_class: left_rail | ||
11 | strip_id_or_class: related-container | ||
12 | strip_id_or_class: radvert-caption-wrap | ||
13 | |||
14 | # Remove gallery? | ||
15 | strip_id_or_class: wpgallery | ||
16 | |||
17 | #strip: //text()[contains(., 'nextpage')] | ||
18 | |||
19 | prune: no | ||
20 | |||
21 | single_page_link: //a[.='View All' and contains(@href, '/all/')] | ||
22 | |||
23 | test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ | ||
24 | test_url: http://www.wired.com/wiredenterprise/2013/09/docker/ | ||
25 | test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/ | ||
diff --git a/inc/3rdparty/site_config/standard/wmnf.org.txt b/inc/3rdparty/site_config/standard/wmnf.org.txt deleted file mode 100755 index 1d403a91..00000000 --- a/inc/3rdparty/site_config/standard/wmnf.org.txt +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | title: //div[@class="bodyText"]/h1/text() | ||
2 | body: //div[@class="bodyText"] | ||
3 | |||
4 | # author and date are separated by only a newline | ||
5 | # can't figure out how to tokenize that yet | ||
6 | author: //div[@class="bodyText"]/span[@class="info"]/text() | ||
7 | date: //div[@class="bodyText"]/span[@class="info"]/text() | ||
8 | |||
9 | # strip metdata from body text | ||
10 | strip: //div[@class="bodyText"]/h1/text() | ||
11 | strip: //div[@class="bodyText"]/span[@class="info"] | ||
12 | strip: //div[@class="bodyText"]/span[@class="info"] | ||
13 | test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt deleted file mode 100755 index 70168fbe..00000000 --- a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | date://*[@class="entry-date"] | ||
2 | author://*[@class="author vcard"] | ||
3 | strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"] | ||
4 | test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wn.de.txt b/inc/3rdparty/site_config/standard/wn.de.txt deleted file mode 100755 index ef18c8a5..00000000 --- a/inc/3rdparty/site_config/standard/wn.de.txt +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | author: //div[@id='main']//div[@class='col right']//div[contains(@class, 'attribute-author')] | ||
2 | body: //div[@id='main']//div[@class='col right'] | ||
3 | strip_id_or_class: boxes | ||
4 | strip_id_or_class: lazy | ||
5 | strip_id_or_class: comment_box | ||
6 | strip_id_or_class: fb_comments | ||
7 | |||
8 | find_string: <noscript> | ||
9 | replace_string: <div> | ||
10 | find_string: </noscript> | ||
11 | replace_string: </div> | ||
12 | |||
13 | prune: no | ||
14 | tidy: no | ||
15 | |||
16 | test_url: http://www.wn.de/Muenster/Kultur/1742956-Wilm-Weppelmann-verlaesst-die-Einsiedelei-Und-dann-ab-unter-die-Dusche | ||
17 | # feed | ||
18 | test_url: http://www.wn.de/rss/feed/wn_muenster \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wordyard.com.txt b/inc/3rdparty/site_config/standard/wordyard.com.txt deleted file mode 100644 index d8c753da..00000000 --- a/inc/3rdparty/site_config/standard/wordyard.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # Generated by FiveFilters.org's web-based selection tool | ||
2 | # Place this file inside your site_config/custom/ folder | ||
3 | # Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.wordyard.com%2F2014%2F09%2F26%2Fremove-blindfold-before-embarking-to-utopia%2F | ||
4 | |||
5 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')] | ||
6 | strip_id_or_class: robots-nocontent | ||
7 | strip_id_or_class: post-revisions | ||
8 | test_url: http://www.wordyard.com/2014/09/26/remove-blindfold-before-embarking-to-utopia/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/worldpoultry.net.txt b/inc/3rdparty/site_config/standard/worldpoultry.net.txt deleted file mode 100755 index b88f9279..00000000 --- a/inc/3rdparty/site_config/standard/worldpoultry.net.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title: //div[@class="content article"]/h1 | ||
2 | date: substring-after(//*[@class='date'], '//') | ||
3 | body: //*[@class='article-content'] | ||
4 | strip: //*[@id='nomodal'] | ||
5 | test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/worldwidewords.org.txt b/inc/3rdparty/site_config/standard/worldwidewords.org.txt deleted file mode 100755 index 4682e0d3..00000000 --- a/inc/3rdparty/site_config/standard/worldwidewords.org.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //p[@id='content'] | ||
2 | |||
3 | body: //div[@class='contentblock'] | ||
4 | test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt deleted file mode 100755 index 44add9c9..00000000 --- a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | title: //h2[@class="posttitle"] | ||
2 | body: //div[@class="post"] | ||
3 | strip: //h2[@class="posttitle"] | ||
4 | strip: //p[@class="filed-under"] | ||
5 | convert_double_br_tags: yes | ||
6 | test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wpmayor.com.txt b/inc/3rdparty/site_config/standard/wpmayor.com.txt deleted file mode 100755 index bb4fffc7..00000000 --- a/inc/3rdparty/site_config/standard/wpmayor.com.txt +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | body: //div[@id='nrelate_flyout_placeholder'] | ||
2 | |||
3 | strip_id_or_class: share | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.wpmayor.com/themes/wordpress-portfolio-resume-themes/ | ||
8 | test_url: http://www.wpmayor.com/feed/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wtatennis.com.txt b/inc/3rdparty/site_config/standard/wtatennis.com.txt deleted file mode 100755 index 1000ab26..00000000 --- a/inc/3rdparty/site_config/standard/wtatennis.com.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[contains(@class, 'header-2')] | ||
2 | body: //article//*[contains(@class, 'teaserText') or contains(@class, 'lastUpdated') or contains(@class, 'image') or contains(@class, 'body')] | ||
3 | strip_id_or_class: articleIndex | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.wtatennis.com/news/article/3190914 | ||
7 | test_url: http://www.wtatennis.com/news/article/3190244 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt deleted file mode 100755 index 97a5c19d..00000000 --- a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | body://div[@id='articleNew'] | ||
2 | strip://div[@id='articleBy'] | ||
3 | strip://div[@id='articleDate'] | ||
4 | strip://td[@class='articleGraphicCredit'] | ||
5 | strip://h1 | ||
6 | strip://div[@id='articleEnd'] | ||
7 | strip://p[@class='tagline'] | ||
8 | strip://div[@class='openBox adslibraryArticle'] | ||
9 | strip_id_or_class:ad-180x150-1 | ||
10 | |||
11 | |||
12 | title: //div[@id="articleNew"]/h1 | ||
13 | author: //div[@id="articleBy"]/p/b | ||
14 | date: substring-before(//div[@id="articleDate"], "-") | ||
15 | test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt deleted file mode 100755 index 71306af2..00000000 --- a/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | strip_id_or_class: hidelabel | ||
2 | test_url: http://www3.imperial.ac.uk/newsandeventspggrp/imperialcollege/newssummary/news_14-7-2010-15-53-18 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wyborcza.pl.txt b/inc/3rdparty/site_config/standard/wyborcza.pl.txt deleted file mode 100755 index 638583dc..00000000 --- a/inc/3rdparty/site_config/standard/wyborcza.pl.txt +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | body: //div[@id='article'] | ||
2 | strip: //div[@class='head'] | ||
3 | |||
4 | strip_id_or_class: txt_upl | ||
5 | |||
6 | single_page_link: //div[@id='gazeta_article_tools']//a[contains(@class, 'print')] | ||
7 | |||
8 | test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x | ||
9 | test_url: http://wyborcza.pl/1,75478,14880255,Biskup_Dydycz_o_pedofilii_i_tajemnicy_spowiedzi__Zamiast.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wyctim.com.txt b/inc/3rdparty/site_config/standard/wyctim.com.txt deleted file mode 100755 index bd7ecf2a..00000000 --- a/inc/3rdparty/site_config/standard/wyctim.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | body: //div[@class='article-body'] | ||
2 | title: //h1 | ||
3 | test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wz-newsline.de.txt b/inc/3rdparty/site_config/standard/wz-newsline.de.txt deleted file mode 100755 index 5b2be744..00000000 --- a/inc/3rdparty/site_config/standard/wz-newsline.de.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title://h1 | ||
2 | |||
3 | date://p[@class='articleDate'] | ||
4 | body://div[@class='articleBody wzStandardArticle'] | ||
5 | test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/xfgjls.com.txt b/inc/3rdparty/site_config/standard/xfgjls.com.txt deleted file mode 100755 index 2dc247a0..00000000 --- a/inc/3rdparty/site_config/standard/xfgjls.com.txt +++ /dev/null | |||
@@ -1,11 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.xfgjls.com/magazine/html/?131.html | ||
3 | # http://www.xfgjls.com/magazine/html/?170.html | ||
4 | |||
5 | body://h3/following-sibling::div | ||
6 | title: //h3 | ||
7 | date: substring-before(//h3/following-sibling::div/p, ' ') | ||
8 | author: substring-before(substring-after(//h3/following-sibling::div/p, '作者:'), '来源') | ||
9 | wrap_in(strong)://span[contains(@style, "FONT-WEIGHT: bold")] | ||
10 | dissolve://span[@style="FONT-FAMILY: '宋体'; FONT-SIZE: 10.5pt; FONT-WEIGHT: bold; mso-spacerun: 'yes'"] | ||
11 | test_url: http://www.xfgjls.com/magazine/html/?170.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/xoeb.us.txt b/inc/3rdparty/site_config/standard/xoeb.us.txt deleted file mode 100755 index c09fa4df..00000000 --- a/inc/3rdparty/site_config/standard/xoeb.us.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | author: //span[@class="fn"] | ||
3 | date: //p[@class="meta"] | ||
4 | test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/yated.com.txt b/inc/3rdparty/site_config/standard/yated.com.txt deleted file mode 100755 index 13a3ea64..00000000 --- a/inc/3rdparty/site_config/standard/yated.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | title: //div[@class='pagetitle'] | ||
2 | test_url: http://www.yated.com/content.asp?categoryid=7&contentid=582 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ynet.co.il.txt b/inc/3rdparty/site_config/standard/ynet.co.il.txt deleted file mode 100755 index aa86566a..00000000 --- a/inc/3rdparty/site_config/standard/ynet.co.il.txt +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | body: //span[@id='article_content' or @class='text16g'] | ||
2 | |||
3 | # ads | ||
4 | strip: //div[.//div[contains(@id, 'ads.')]] | ||
5 | # related content heading | ||
6 | strip: //p[contains(., 'עוד בערוץ החדשות של ynet:')] | ||
7 | strip: //p[contains(., 'כותרות אחרונות מהעולם בחדשות ynet:')] | ||
8 | strip: //div[contains(., 'אינציקלופדיית ynet:')] | ||
9 | # related content links | ||
10 | strip: //a[@class='bluelink'] | ||
11 | # strip image bullets | ||
12 | strip_image_src: ynet_manual_bullet.png | ||
13 | |||
14 | prune: no | ||
15 | tidy: no | ||
16 | |||
17 | # prevent JS issues | ||
18 | find_string: <script type='text/javascript'> | ||
19 | replace_string: <div style="display:none;"> | ||
20 | find_string: </script> | ||
21 | replace_string: </div> | ||
22 | |||
23 | test_url: http://www.ynet.co.il/articles/0,7340,L-4354266,00.html | ||
24 | test_url: http://www.ynet.co.il/articles/0,7340,L-4354268,00.html | ||
25 | #feed | ||
26 | test_url: http://www.ynet.co.il/Integration/StoryRss2.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/yostivanich.com.txt b/inc/3rdparty/site_config/standard/yostivanich.com.txt deleted file mode 100755 index 2aeb7e05..00000000 --- a/inc/3rdparty/site_config/standard/yostivanich.com.txt +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | title://div[@class='entry-title'] | ||
2 | body://div[@class='entry-content'] | ||
3 | strip_comments:yes | ||
4 | convert_double_br_tags:yes | ||
5 | test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/yourerie.com.txt b/inc/3rdparty/site_config/standard/yourerie.com.txt deleted file mode 100755 index b46b09e8..00000000 --- a/inc/3rdparty/site_config/standard/yourerie.com.txt +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | body: //div[@class="nxFullTextData"] | ||
2 | test_url: http://yourerie.com/fulltext?nxd_id=306552 | ||
diff --git a/inc/3rdparty/site_config/standard/youtube.com.txt b/inc/3rdparty/site_config/standard/youtube.com.txt deleted file mode 100755 index b0d95f1f..00000000 --- a/inc/3rdparty/site_config/standard/youtube.com.txt +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | title: //title | ||
2 | body: //iframe | ||
3 | |||
4 | find_string: <html><iframe | ||
5 | replace_string: <iframe id="video" | ||
6 | |||
7 | find_string: ></iframe></html> | ||
8 | replace_string: ></iframe> | ||
9 | |||
10 | single_page_link: //link[@type='text/xml+oembed'] | ||
11 | |||
12 | prune: no | ||
13 | tidy: no | ||
14 | |||
15 | test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zcommunications.org.txt b/inc/3rdparty/site_config/standard/zcommunications.org.txt deleted file mode 100755 index 4deb49bf..00000000 --- a/inc/3rdparty/site_config/standard/zcommunications.org.txt +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | title: //h1[@id='view_title'] | ||
2 | author: //div[contains(@class, 'content_authors')]//a | ||
3 | body: //div[@id='view_body'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.zcommunications.org/orwellian-language-update-by-edward-s-herman.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zdnet.com.txt b/inc/3rdparty/site_config/standard/zdnet.com.txt deleted file mode 100755 index 939fb0e3..00000000 --- a/inc/3rdparty/site_config/standard/zdnet.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | title: //h1[@class="h s-1"] | ||
2 | author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|') | ||
3 | author: substring-after(//div[@class="bio"]//h3, 'About ') | ||
4 | date: substring-after(//p[@class="meta s-10"], '|') | ||
5 | date: substring-after(//p[@class="meta"], '|') | ||
6 | body: //div[@class="content-1 entry space-1 clear"] | ||
7 | body: //div[@class="storyBody"] | ||
8 | |||
9 | test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920 | ||
10 | test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt deleted file mode 100755 index 9815d478..00000000 --- a/inc/3rdparty/site_config/standard/zeit.de.txt +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | # 2013.10.30 [rezor92] fixed single_page_link | ||
2 | # 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions | ||
3 | # 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) | ||
4 | # 2011-12-09 [carlo@...] Removed "related articles" block | ||
5 | # 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. | ||
6 | # 2011-08-20 [carlo@...] added author, fixed date | ||
7 | |||
8 | |||
9 | single_page_link: //a[@title='Auf einer Seite'] | ||
10 | tidy: no | ||
11 | |||
12 | title: //title | ||
13 | date: substring-before( //li[@class="date"], " " ) | ||
14 | author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text() | ||
15 | author: substring-after(//li[@class='source first '], 'Quelle: ') | ||
16 | |||
17 | strip_id_or_class: articleheader | ||
18 | strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] | ||
19 | |||
20 | #Removes author and date from the start | ||
21 | strip: //ul[@class="tools"] | ||
22 | #Removes copyright statement - often disturb as first line of the news | ||
23 | strip: //p[@class="copyright"] | ||
24 | strip: //div[@class="copyright"] | ||
25 | #Removes pagination links at the end | ||
26 | strip: //div[@class="pagination"] | ||
27 | |||
28 | # Fix picture captions | ||
29 | wrap_in(small): //p[@class="caption"]/text() | ||
30 | |||
31 | # Fix sub-headlines | ||
32 | wrap_in(h2): //p/strong | ||
33 | dissolve: //h2/strong | ||
34 | |||
35 | #Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here. | ||
36 | strip_id_or_class:"informatives" | ||
37 | strip_id_or_class:"bottom" | ||
38 | strip_id_or_class:"teasermosaic" | ||
39 | strip_id_or_class:"comments" | ||
40 | strip_id_or_class:"articlefooter af" | ||
41 | strip_id_or_class:"relateds" | ||
42 | strip_id_or_class:"pagination" | ||
43 | |||
44 | footnotes: no | ||
45 | test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag | ||
diff --git a/inc/3rdparty/site_config/standard/zerodistraction.com.txt b/inc/3rdparty/site_config/standard/zerodistraction.com.txt deleted file mode 100644 index d3b60c7d..00000000 --- a/inc/3rdparty/site_config/standard/zerodistraction.com.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | author: //span[@class='author']//a | ||
2 | date: //span[@class='date'] | ||
3 | test_url: http://zerodistraction.com/blog/2012/3/11/retina-ipad-that-means-i-am-going-digital-only-for-comic-boo.html | ||
4 | test_url: http://zerodistraction.com/notes/unreasonably-grumpy \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zerohedge.com.txt b/inc/3rdparty/site_config/standard/zerohedge.com.txt deleted file mode 100755 index 7e76aee5..00000000 --- a/inc/3rdparty/site_config/standard/zerohedge.com.txt +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | author: //span[@class='submitted']/a | ||
2 | strip: //div[@class='clear-block clr'] | ||
3 | strip: //div[@class='picture'] | ||
4 | strip: //span[@class='submitted'] | ||
5 | strip: //div[@class='breadcrumb'] | ||
6 | strip: //div[@class='fivestar-static-form-item'] | ||
7 | strip: //div[@class='js-links'] | ||
8 | strip: //div[@class='links clear-block clear'] | ||
9 | strip: //div[@class='block block-block'] | ||
10 | test_url: http://www.zerohedge.com/news/bernankes-columbus-voyage-end-monetary-policy-world \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zerokspot.com.txt b/inc/3rdparty/site_config/standard/zerokspot.com.txt deleted file mode 100755 index afa964db..00000000 --- a/inc/3rdparty/site_config/standard/zerokspot.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id="primarycontent"] | ||
3 | test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zhihu.com.txt b/inc/3rdparty/site_config/standard/zhihu.com.txt deleted file mode 100755 index 3c9d8c1a..00000000 --- a/inc/3rdparty/site_config/standard/zhihu.com.txt +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.zhihu.com/question/19587406 | ||
3 | # http://www.zhihu.com/question/20649035 | ||
4 | # http://www.zhihu.com/question/20637942 | ||
5 | |||
6 | author: //h3[@class='zm-item-answer-author-wrap'] | ||
7 | title://h2[@class='zm-item-title'] | ||
8 | date://a[@class='answer-date-link meta-item'] | ||
9 | convert_double_br_tags: yes | ||
10 | |||
11 | wrap_in(blockquote)://div[@class='zm-editable-content'] | ||
12 | wrap_in(blockquote)://sup/text() | ||
13 | dissolve://sup | ||
14 | |||
15 | strip://div[@class='zh-answers-title'] | ||
16 | strip:///div[@class='zm-item-vote-info '] | ||
17 | strip://div[@class='zm-item-answer-author-info'] | ||
18 | strip://div[@class='zu-blue-info-board zg-r3px'] | ||
19 | test_url: http://www.zhihu.com/question/20637942 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zingtrain.com.txt b/inc/3rdparty/site_config/standard/zingtrain.com.txt deleted file mode 100755 index 188d4dd6..00000000 --- a/inc/3rdparty/site_config/standard/zingtrain.com.txt +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | title: substring-after(id, 'post')/h2 | ||
2 | body://div[@class = 'entry'] | ||
3 | test_url: http://www.zingtrain.com/category/ontrack/january-2007/ \ No newline at end of file | ||