From b668db242de35f13de0b317ceaa209574458e9c8 Mon Sep 17 00:00:00 2001 From: Robert Ros Date: Thu, 18 Sep 2014 21:33:22 +0200 Subject: Convert the MySQL charset to utf8mb4 to support the full range of unicode characters --- inc/poche/Database.class.php | 8 +++++--- install/index.php | 6 ++++-- install/mysql.sql | 12 ++++++------ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/inc/poche/Database.class.php b/inc/poche/Database.class.php index dfd7ae34..7aaf9740 100755 --- a/inc/poche/Database.class.php +++ b/inc/poche/Database.class.php @@ -24,15 +24,17 @@ class Database { switch (STORAGE) { case 'sqlite': // Check if /db is writeable - if ( !is_writable(STORAGE_SQLITE) || !is_writable(dirname(STORAGE_SQLITE))) { + if ( !is_writable(STORAGE_SQLITE) || !is_writable(dirname(STORAGE_SQLITE))) { die('An error occured: "db" directory must be writeable for your web server user!'); } $db_path = 'sqlite:' . STORAGE_SQLITE; $this->handle = new PDO($db_path); break; case 'mysql': - $db_path = 'mysql:host=' . STORAGE_SERVER . ';dbname=' . STORAGE_DB; - $this->handle = new PDO($db_path, STORAGE_USER, STORAGE_PASSWORD); + $db_path = 'mysql:host=' . STORAGE_SERVER . ';dbname=' . STORAGE_DB . ';charset=utf8mb4'; + $this->handle = new PDO($db_path, STORAGE_USER, STORAGE_PASSWORD, array( + PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES utf8mb4', + )); break; case 'postgres': $db_path = 'pgsql:host=' . STORAGE_SERVER . ';dbname=' . STORAGE_DB; diff --git a/install/index.php b/install/index.php index 1ae782a2..2b080c16 100755 --- a/install/index.php +++ b/install/index.php @@ -101,12 +101,14 @@ else if (isset($_POST['install'])) { $content = file_get_contents('inc/poche/config.inc.php'); if ($_POST['db_engine'] == 'mysql') { - $db_path = 'mysql:host=' . $_POST['mysql_server'] . ';dbname=' . $_POST['mysql_database']; + $db_path = 'mysql:host=' . $_POST['mysql_server'] . ';dbname=' . $_POST['mysql_database'] . ';charset=utf8mb4'; $content = str_replace("define ('STORAGE_SERVER', 'localhost');", "define ('STORAGE_SERVER', '".$_POST['mysql_server']."');", $content); $content = str_replace("define ('STORAGE_DB', 'poche');", "define ('STORAGE_DB', '".$_POST['mysql_database']."');", $content); $content = str_replace("define ('STORAGE_USER', 'poche');", "define ('STORAGE_USER', '".$_POST['mysql_user']."');", $content); $content = str_replace("define ('STORAGE_PASSWORD', 'poche');", "define ('STORAGE_PASSWORD', '".$_POST['mysql_password']."');", $content); - $handle = new PDO($db_path, $_POST['mysql_user'], $_POST['mysql_password']); + $handle = new PDO($db_path, $_POST['mysql_user'], $_POST['mysql_password'], array( + PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES utf8mb4', + )); $sql_structure = file_get_contents('install/mysql.sql'); } diff --git a/install/mysql.sql b/install/mysql.sql index de5640e4..1b65cd35 100644 --- a/install/mysql.sql +++ b/install/mysql.sql @@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS `config` ( `name` varchar(255) NOT NULL, `value` varchar(255) NOT NULL, PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; CREATE TABLE IF NOT EXISTS `entries` ( `id` int(11) NOT NULL AUTO_INCREMENT, @@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS `entries` ( `content` blob NOT NULL, `user_id` int(11) NOT NULL, PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; CREATE TABLE IF NOT EXISTS `users` ( `id` int(11) NOT NULL AUTO_INCREMENT, @@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS `users` ( `name` varchar(255) NOT NULL, `email` varchar(255) NOT NULL, PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; CREATE TABLE IF NOT EXISTS `users_config` ( `id` int(11) NOT NULL AUTO_INCREMENT, @@ -31,13 +31,13 @@ CREATE TABLE IF NOT EXISTS `users_config` ( `name` varchar(255) NOT NULL, `value` varchar(255) NOT NULL, PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; CREATE TABLE IF NOT EXISTS `tags` ( `id` int(11) NOT NULL AUTO_INCREMENT, `value` varchar(255) NOT NULL, PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; CREATE TABLE IF NOT EXISTS `tags_entries` ( `id` int(11) NOT NULL AUTO_INCREMENT, @@ -46,4 +46,4 @@ CREATE TABLE IF NOT EXISTS `tags_entries` ( FOREIGN KEY(entry_id) REFERENCES entries(id) ON DELETE CASCADE, FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE, PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; -- cgit v1.2.3 From 2d4cfc58ec324987ad39365cbb3e4eb49df4e426 Mon Sep 17 00:00:00 2001 From: Mariusz Kozakowski <11mariom+wordpress@gmail.com> Date: Wed, 17 Sep 2014 18:44:29 +0200 Subject: Add support for custom http port Now you can use wallabag behind reverse proxy (i.e Squid or Varnish) without problem with urls like wallabag.example.com:8080. --- inc/poche/Tools.class.php | 1 + inc/poche/config.inc.default.php | 2 ++ 2 files changed, 3 insertions(+) diff --git a/inc/poche/Tools.class.php b/inc/poche/Tools.class.php index 93ec3fc6..beb4f30c 100755 --- a/inc/poche/Tools.class.php +++ b/inc/poche/Tools.class.php @@ -51,6 +51,7 @@ final class Tools $serverport = (!isset($_SERVER["SERVER_PORT"]) || $_SERVER["SERVER_PORT"] == '80' + || $_SERVER["SERVER_PORT"] == HTTP_PORT || ($https && $_SERVER["SERVER_PORT"] == '443') || ($https && $_SERVER["SERVER_PORT"]==SSL_PORT) //Custom HTTPS port detection ? '' : ':' . $_SERVER["SERVER_PORT"]); diff --git a/inc/poche/config.inc.default.php b/inc/poche/config.inc.default.php index 2a458544..f666f468 100755 --- a/inc/poche/config.inc.default.php +++ b/inc/poche/config.inc.default.php @@ -24,6 +24,8 @@ ################################################################################# # Do not trespass unless you know what you are doing ################################################################################# +// Change this if http is running on nonstandard port - i.e is behind cache proxy +@define ('HTTP_PORT', 80); // Change this if not using the standart port for SSL - i.e you server is behind sslh @define ('SSL_PORT', 443); -- cgit v1.2.3 From ad0eccb4cd7fe4a1c463073e554d56b3398ca63b Mon Sep 17 00:00:00 2001 From: Marmo Date: Sat, 11 Oct 2014 15:22:53 +0200 Subject: update heise.de.txt Multi-page Telepolis-articles (www.heise.de/tp/...) are not fetched correctly atm. My addition to the single_page_link makes it work (tested with http://www.heise.de/tp/artikel/42/42579/1.html). --- inc/3rdparty/site_config/standard/heise.de.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/inc/3rdparty/site_config/standard/heise.de.txt b/inc/3rdparty/site_config/standard/heise.de.txt index c51af561..37a4aaf0 100755 --- a/inc/3rdparty/site_config/standard/heise.de.txt +++ b/inc/3rdparty/site_config/standard/heise.de.txt @@ -1,7 +1,9 @@ -single_page_link: //p[@class='news_option']/a +#second part of single_page_link for telepolis-articles (desktop-version of site) +single_page_link: //p[@class='news_option']/a | //a[@id='tp-druckversion'] date: //p[@class='news_datum'] title: //h1 body: //div[@class='meldung_wrapper'] -test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html \ No newline at end of file +test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html +test_url: http://www.heise.de/tp/artikel/42/42579/1.html -- cgit v1.2.3 From 8ce508cab0e4963f24ba9142bab64ab996715ed9 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Sun, 12 Oct 2014 10:00:35 +0200 Subject: Create adme.ru.txt Siteconfig --- inc/3rdparty/site_config/standard/adme.ru.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 inc/3rdparty/site_config/standard/adme.ru.txt diff --git a/inc/3rdparty/site_config/standard/adme.ru.txt b/inc/3rdparty/site_config/standard/adme.ru.txt new file mode 100644 index 00000000..b929685d --- /dev/null +++ b/inc/3rdparty/site_config/standard/adme.ru.txt @@ -0,0 +1,6 @@ +# Generated by FiveFilters.org's web-based selection tool +# Place this file inside your site_config/custom/ folder +# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.adme.ru%2Ftvorchestvo-hudozhniki%2Fprostoj-kak-5-kopeek-hudozhnik-557405%2F + +body: //article[contains(concat(' ',normalize-space(@class),' '),' article ')] +test_url: http://www.adme.ru/tvorchestvo-hudozhniki/prostoj-kak-5-kopeek-hudozhnik-557405/ -- cgit v1.2.3 From b9fa7d2c9cbb0adc80fe2971df3488f9e325d7b7 Mon Sep 17 00:00:00 2001 From: tcit Date: Sun, 12 Oct 2014 10:24:07 +0200 Subject: fix z-index-menu mobile view bug #834 --- themes/baggy/css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/themes/baggy/css/main.css b/themes/baggy/css/main.css index b775a291..52ba50f2 100755 --- a/themes/baggy/css/main.css +++ b/themes/baggy/css/main.css @@ -180,7 +180,7 @@ h2:after { padding-top: 9.5em; height: 100%; box-shadow:inset -4px 0 20px rgba(0,0,0,0.6); - z-index: 10; + z-index: 15; } #main { -- cgit v1.2.3 From 48fb171d7a64dbd1036f9e17cbf4c14304483817 Mon Sep 17 00:00:00 2001 From: tcit Date: Wed, 15 Oct 2014 16:47:38 +0200 Subject: fix for #830 --- themes/default/js/popupForm.js | 8 ++++++++ themes/default/js/saveLink.js | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/themes/default/js/popupForm.js b/themes/default/js/popupForm.js index a32e6e70..eb6d1ae2 100644 --- a/themes/default/js/popupForm.js +++ b/themes/default/js/popupForm.js @@ -10,6 +10,14 @@ $(document).ready(function() { $("#search").click(function(){ closeSearch(); + // if other popup is already shown + if ($("#bagit-form").length != 0) { + $("#bagit").removeClass("active-current"); + $('#content').removeClass("opacity03"); + $("#bagit").removeClass("current"); + $("#bagit-arrow").removeClass("arrow-down"); + $("#bagit-form").hide(); + } $('#searchfield').focus(); }); diff --git a/themes/default/js/saveLink.js b/themes/default/js/saveLink.js index 6dbce97e..b52b8a2c 100755 --- a/themes/default/js/saveLink.js +++ b/themes/default/js/saveLink.js @@ -13,7 +13,7 @@ $.fn.ready(function() { $bagit.toggleClass("active-current"); - //only if bagiti link is not presented on page + //only if bag-it link is not presented on page if ( $bagit.length === 0 ) { if ( event !== 'undefined' && event ) { $bagitForm.css( {position:"absolute", top:event.pageY, left:event.pageX-200}); @@ -23,6 +23,11 @@ $.fn.ready(function() { } } + if ($("#search-form").length != 0) { + $("#search").removeClass("current"); + $("#search-arrow").removeClass("arrow-down"); + $("#search-form").hide(); + } $bagitForm.toggle(); $('#content').toggleClass("opacity03"); if (url !== 'undefined' && url) { -- cgit v1.2.3 From 750d904a16465bb01eac64e87aba0b27c6fb7d12 Mon Sep 17 00:00:00 2001 From: tcit Date: Fri, 17 Oct 2014 21:08:08 +0200 Subject: fix translation issues --- locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo | Bin 12987 -> 14084 bytes locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.po | 30 ++++++++++++++++++++++++++++ locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo | Bin 16505 -> 17607 bytes locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.po | 30 ++++++++++++++++++++++++++++ themes/baggy/home.twig | 4 ++-- 5 files changed, 62 insertions(+), 2 deletions(-) diff --git a/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo b/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo index a3c98497..b8132fb5 100644 Binary files a/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo and b/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo differ diff --git a/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.po b/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.po index c589866c..32e96d07 100644 --- a/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.po +++ b/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.po @@ -571,6 +571,36 @@ msgstr "" msgid "Enter your search here" msgstr "Enter your search here" +msgid "Apply the tag %search_term% to this search" +msgstr "Apply the tag %search_term% to this search" + +# ebook +msgid "Fancy an E-Book ?" +msgstr "Fancy an E-Book ?" + +msgid "" +"Click on this " +"link to get all your articles in one ebook (ePub 3 format)." +msgstr "" +"Click on this " +"link to get all your articles in one ebook (ePub 3 format)." + +msgid "" +"This can take a while and can even fail if you have too many " +"articles, depending on your server configuration." +msgstr "" +"This can take a while and can even fail if you have too many " +"articles, depending on your server configuration." + +msgid "Download the articles from this tag in an epub" +msgstr "Download the articles from this tag in an epub" + +msgid "Download the articles from this search in an epub" +msgstr "Download the articles from this search in an epub" + +msgid "Download the articles from this category in an epub" +msgstr "Download the articles from this category in an epub" + #~ msgid "poche it!" #~ msgstr "poche it!" diff --git a/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo b/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo index f4a28e72..b2d8daae 100644 Binary files a/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo and b/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo differ diff --git a/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.po b/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.po index 0912b872..df433636 100644 --- a/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.po +++ b/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.po @@ -655,3 +655,33 @@ msgstr "Cache effacé." msgid "Oops, it seems you don't have PHP 5." msgstr "Oups, vous ne semblez pas avoir PHP 5." + +msgid "Apply the tag %search_term% to this search" +msgstr "Appliquer le tag %search_term% à ces résultats" + +# ebook +msgid "Fancy an E-Book ?" +msgstr "Envie d'un E-Book ?" + +msgid "" +"Click on this " +"link to get all your articles in one ebook (ePub 3 format)." +msgstr "" +"Cliquez sur ce " +"lien pour obtenir tous vos articles (format ePub 3)." + +msgid "" +"This can take a while and can even fail if you have too many " +"articles, depending on your server configuration." +msgstr "" +"Ceci peut prendre un moment et même échouer si vous avez trop " +"d'articles, selon la configuration de votre serveur." + +msgid "Download the articles from this tag in an epub" +msgstr "Télécharger les articles de ce tag dans un epub" + +msgid "Download the articles from this search in an epub" +msgstr "Télécharger les articles de cette recherche dans un epub" + +msgid "Download the articles from this category in an epub" +msgstr "Télécharger les articles de cette catégorie dans un epub" diff --git a/themes/baggy/home.twig b/themes/baggy/home.twig index e788b588..157615ae 100755 --- a/themes/baggy/home.twig +++ b/themes/baggy/home.twig @@ -57,9 +57,9 @@ {% endfor %} {{ block('pager') }} - {% if view == 'home' %}{% if nb_results > 1 %}{{ "Mark all the entries as read" }}{% endif %}{% endif %} + {% if view == 'home' %}{% if nb_results > 1 %}{% trans "Mark all the entries as read" %}{% endif %}{% endif %} - {% if search_term is defined %}{% trans %} Apply the tag {{ search_term }} to this search {% endtrans %}{% endif %} + {% if search_term is defined %}{% trans %}Apply the tag {{ search_term }} to this search{% endtrans %}{% endif %} {% if tag %}{% trans "Download the articles from this tag in an epub" %} {% elseif search_term is defined %}{% trans "Download the articles from this search in an epub" %} -- cgit v1.2.3 From f56791e6c482f95d1a5aef332ba69fc81f0666cb Mon Sep 17 00:00:00 2001 From: tcit Date: Sun, 19 Oct 2014 11:12:25 +0200 Subject: fix #873 --- themes/baggy/css/main.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/themes/baggy/css/main.css b/themes/baggy/css/main.css index 52ba50f2..7adde2fa 100755 --- a/themes/baggy/css/main.css +++ b/themes/baggy/css/main.css @@ -777,6 +777,10 @@ margin-top:1em; color: #FFF; } +.icon-check.archive:before { + color: #FFF; +} + /* ========================================================================== 4 = Messages ========================================================================== */ -- cgit v1.2.3 From 76b1e0babee9137974f7ce1677259b62c3b7fb4d Mon Sep 17 00:00:00 2001 From: Marmo Date: Tue, 21 Oct 2014 19:33:40 +0200 Subject: update zeit.de.txt for removal of inline ads --- inc/3rdparty/site_config/standard/zeit.de.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt index 9815d478..8c9c1718 100755 --- a/inc/3rdparty/site_config/standard/zeit.de.txt +++ b/inc/3rdparty/site_config/standard/zeit.de.txt @@ -1,3 +1,4 @@ +# 2014-10-21 [Marmo] added stripping of inline ads and appropriate test_url # 2013.10.30 [rezor92] fixed single_page_link # 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions # 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) @@ -16,6 +17,8 @@ author: substring-after(//li[@class='source first '], 'Quelle: ') strip_id_or_class: articleheader strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] +#Remove inline ads +strip: //div[@class="innerad"] #Removes author and date from the start strip: //ul[@class="tools"] @@ -43,3 +46,4 @@ strip_id_or_class:"pagination" footnotes: no test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag +test_url: http://www.zeit.de/wissen/2014-10/ebola-nigeria-who -- cgit v1.2.3 From 606bea72e1bce1b93f29c02e89b5e82e15b2f1f9 Mon Sep 17 00:00:00 2001 From: tcit Date: Wed, 22 Oct 2014 15:10:38 +0200 Subject: fix #882 --- inc/poche/Routing.class.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/poche/Routing.class.php b/inc/poche/Routing.class.php index 0b373058..6ae93d21 100755 --- a/inc/poche/Routing.class.php +++ b/inc/poche/Routing.class.php @@ -119,7 +119,7 @@ class Routing } elseif (isset($_GET['deluser'])) { $this->wallabag->deleteUser($_POST['password4deletinguser']); } elseif (isset($_GET['epub'])) { - $epub = new WallabagEpub($this->wallabag, $_GET['method'], $_GET['id'], $_GET['value']); + $epub = new WallabagEpub($this->wallabag, $_GET['method'], $_GET['value']); $epub->run(); } elseif (isset($_GET['import'])) { $import = $this->wallabag->import(); -- cgit v1.2.3 From 90a1a78b1e2f4d40e1d9b8e6f46aca129a9d7bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Mon, 27 Oct 2014 06:46:13 +0100 Subject: updated site_config --- .../site_config/standard/512pixels.net.txt | 4 +- inc/3rdparty/site_config/standard/README.md | 8 +-- .../site_config/standard/alexduner.com.txt | 2 +- .../site_config/standard/anandtech.com.txt | 6 ++- .../site_config/standard/apotheke-adhoc.de.txt | 23 +++++++++ .../site_config/standard/arstechnica.com.txt | 2 + .../site_config/standard/autocar.co.uk.txt | 13 +++++ inc/3rdparty/site_config/standard/bbc.co.uk.txt | 17 +++++- inc/3rdparty/site_config/standard/bbc.com.txt | 60 ++++++++++++++++++++++ inc/3rdparty/site_config/standard/bit-tech.net.txt | 19 +++++++ .../site_config/standard/bleacherreport.com.txt | 16 ++++++ .../site_config/standard/blogs.faz.net.txt | 45 ++++++++++++++++ .../site_config/standard/brasil.elpais.com.txt | 7 ++- .../site_config/standard/businessweek.com.txt | 41 +++++---------- inc/3rdparty/site_config/standard/buzzfeed.com.txt | 11 +++- .../site_config/standard/canonrumors.com.txt | 28 ++++++++++ inc/3rdparty/site_config/standard/chomsky.info.txt | 3 +- .../site_config/standard/cn.reuters.com.txt | 6 ++- .../site_config/standard/code.fivefilters.org.txt | 4 +- .../site_config/standard/csmonitor.com.txt | 2 +- .../site_config/standard/da.feedsportal.com.txt | 2 +- .../site_config/standard/designsponge.com.txt | 31 +++++++++++ .../site_config/standard/desitvforum.net.txt | 4 +- .../standard/deutsche-apotheker-zeitung.de.txt | 29 +++++++++++ .../standard/dictionary.reference.com.txt | 8 ++- inc/3rdparty/site_config/standard/dropbox.com.txt | 4 +- .../site_config/standard/echo-online.de.txt | 24 +++++++++ .../site_config/standard/economist.com.txt | 7 ++- .../site_config/standard/eurogamer.net.txt | 13 ++--- inc/3rdparty/site_config/standard/facebook.com.txt | 9 +++- inc/3rdparty/site_config/standard/faz.net.txt | 0 .../site_config/standard/finance.yahoo.com.txt | 4 +- .../site_config/standard/fivechapters.com.txt | 2 +- .../site_config/standard/fivefilters.org.txt | 5 +- .../site_config/standard/foreignpolicy.com.txt | 8 ++- inc/3rdparty/site_config/standard/golem.de.txt | 53 +++++++++++-------- inc/3rdparty/site_config/standard/heise.de.txt | 45 +++++++++++++--- .../site_config/standard/hosted.ap.org.txt | 2 +- .../site_config/standard/itunes.apple.com.txt | 14 +++++ .../site_config/standard/kachiblog.com.txt | 2 +- .../site_config/standard/lifehacker.co.uk.txt | 7 +++ inc/3rdparty/site_config/standard/mainpost.de.txt | 2 +- .../site_config/standard/medialens.org.txt | 3 +- inc/3rdparty/site_config/standard/medium.com.txt | 13 +++-- .../site_config/standard/menshealth.com.sg.txt | 12 +++++ .../site_config/standard/northumberlandview.ca.txt | 2 +- inc/3rdparty/site_config/standard/nytimes.com.txt | 6 ++- inc/3rdparty/site_config/standard/real.gr.txt | 6 ++- inc/3rdparty/site_config/standard/reddit.com.txt | 5 +- .../site_config/standard/searchengineland.com.txt | 2 +- .../site_config/standard/sourcebooks.com.txt | 2 +- .../site_config/standard/tabletmag.com.txt | 5 ++ .../site_config/standard/tagesspiegel.de.txt | 60 ++++++++++++++++++++++ inc/3rdparty/site_config/standard/techmeme.com.txt | 2 +- .../site_config/standard/theatlantic.com.txt | 2 + .../site_config/standard/theglobeandmail.com.txt | 7 ++- .../site_config/standard/theguardian.com.txt | 13 ++++- inc/3rdparty/site_config/standard/theverge.com.txt | 7 ++- .../site_config/standard/thisiscolossal.com.txt | 25 +++++++++ .../site_config/standard/towerofthehand.com.txt | 10 ++++ inc/3rdparty/site_config/standard/twitter.com.txt | 3 +- .../site_config/standard/vanityfair.com.txt | 5 +- inc/3rdparty/site_config/standard/wn.de.txt | 18 +++++++ inc/3rdparty/site_config/standard/zeit.de.txt | 4 -- 64 files changed, 685 insertions(+), 119 deletions(-) create mode 100755 inc/3rdparty/site_config/standard/apotheke-adhoc.de.txt create mode 100755 inc/3rdparty/site_config/standard/autocar.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/bbc.com.txt create mode 100755 inc/3rdparty/site_config/standard/bit-tech.net.txt create mode 100755 inc/3rdparty/site_config/standard/bleacherreport.com.txt create mode 100755 inc/3rdparty/site_config/standard/blogs.faz.net.txt create mode 100755 inc/3rdparty/site_config/standard/canonrumors.com.txt create mode 100755 inc/3rdparty/site_config/standard/designsponge.com.txt create mode 100755 inc/3rdparty/site_config/standard/deutsche-apotheker-zeitung.de.txt create mode 100755 inc/3rdparty/site_config/standard/echo-online.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/faz.net.txt create mode 100755 inc/3rdparty/site_config/standard/itunes.apple.com.txt create mode 100755 inc/3rdparty/site_config/standard/lifehacker.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/menshealth.com.sg.txt create mode 100755 inc/3rdparty/site_config/standard/tabletmag.com.txt create mode 100755 inc/3rdparty/site_config/standard/tagesspiegel.de.txt create mode 100755 inc/3rdparty/site_config/standard/thisiscolossal.com.txt create mode 100755 inc/3rdparty/site_config/standard/towerofthehand.com.txt create mode 100755 inc/3rdparty/site_config/standard/wn.de.txt diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt index e458980f..02a996f7 100755 --- a/inc/3rdparty/site_config/standard/512pixels.net.txt +++ b/inc/3rdparty/site_config/standard/512pixels.net.txt @@ -1,2 +1,2 @@ -title: substring-before(//title, '—') -test_url: http://512pixels.net/more-on-linked-lists/ \ No newline at end of file +title: //meta[@property='og:title']/@content +test_url: http://www.512pixels.net/blog/2014/10/the-move diff --git a/inc/3rdparty/site_config/standard/README.md b/inc/3rdparty/site_config/standard/README.md index 9040ba85..ab5b12d9 100755 --- a/inc/3rdparty/site_config/standard/README.md +++ b/inc/3rdparty/site_config/standard/README.md @@ -1,12 +1,14 @@ Full-Text RSS site config files ================ -[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically. +[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no rules are found, it tries to detect the content block automatically. -This repository contains the site config files we use in Full-Text RSS. +This repository contains the site-specific extraction rules we rely on in Full-Text RSS. ### Contributing changes +We run automated tests on these files to detect issues. If you'd like to help keep these up to date, please look at the [test results](http://siteconfig.fivefilters.org/test/) and see which files you'd like to contribute fixes for. + We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface. You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model: @@ -31,7 +33,7 @@ Marco, Instapaper's creator, graciously opened up the database of contributions > And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached. -Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required). +Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (no longer available since Instapaper was sold). ### Testing site config files diff --git a/inc/3rdparty/site_config/standard/alexduner.com.txt b/inc/3rdparty/site_config/standard/alexduner.com.txt index bd9de9d7..3897f9ec 100755 --- a/inc/3rdparty/site_config/standard/alexduner.com.txt +++ b/inc/3rdparty/site_config/standard/alexduner.com.txt @@ -1,4 +1,4 @@ body: //section[@class='content'] date: //span[1] author: //h1[@id='sitetitle'] -test_url: https://alexduner.com/blog/2013/1/something-i-learned-today \ No newline at end of file +test_url: http://alexduner.com/blog/something-i-learned-today diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt index 7d804918..fc95c5d8 100755 --- a/inc/3rdparty/site_config/standard/anandtech.com.txt +++ b/inc/3rdparty/site_config/standard/anandtech.com.txt @@ -1,3 +1,5 @@ +body: //section[@class='main_cont']/img | //div[@class='articleContent'] +title: //div[@class='blog_top_left']//h2 author: //a[@class='b'][1] date: substring-after(substring-before(//div, 'Posted in'), ' on ') strip_image_src: /content/images/globals/ @@ -8,4 +10,6 @@ prune: no single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) -test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file +test_url: http://www.anandtech.com/show/8370/gigabyte-am1m-s2h-review +test_url: http://www.anandtech.com/show/8402/sandisk-releases-ultra-ii-ssd-the-second-tlc-nand-ssd-in-the-market +test_url: http://www.anandtech.com/show/8400/arms-cortex-m-even-smaller-and-lower-power-cpu-cores diff --git a/inc/3rdparty/site_config/standard/apotheke-adhoc.de.txt b/inc/3rdparty/site_config/standard/apotheke-adhoc.de.txt new file mode 100755 index 00000000..3a702e7b --- /dev/null +++ b/inc/3rdparty/site_config/standard/apotheke-adhoc.de.txt @@ -0,0 +1,23 @@ +# Author: zinnober + +prune: no + +title: substring-before(//div[@id='content']/h1, ',') + +single_page_link: //a[@title='Seite drucken'] + +body: //div[@id='detail-body'] + +replace_string(): +replace_string(

):

+ +# Fix headlines +replace_string(Patrick Hollstein):   +replace_string(APOTHEKE ADHOC):   +replace_string(dpa):   +replace_string(Katharina Lübke):   +replace_string(Julia Pradel):   +replace_string(Franziska Gerhardt):   + +test_url: http://www.apotheke-adhoc.de/nachrichten/politik/nachricht-detail-politik/deutscher-apothekertag-antraege-gegen-lieferengpaesse-2/ + diff --git a/inc/3rdparty/site_config/standard/arstechnica.com.txt b/inc/3rdparty/site_config/standard/arstechnica.com.txt index 767f6800..eb92aa2c 100755 --- a/inc/3rdparty/site_config/standard/arstechnica.com.txt +++ b/inc/3rdparty/site_config/standard/arstechnica.com.txt @@ -13,5 +13,7 @@ title: //div[@id='story']//h2[@class='title'] strip: //div[@class='pager'] next_page_link: //nav//a[span/@class='next']/@href +native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')] + test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/ diff --git a/inc/3rdparty/site_config/standard/autocar.co.uk.txt b/inc/3rdparty/site_config/standard/autocar.co.uk.txt new file mode 100755 index 00000000..9f4fe18b --- /dev/null +++ b/inc/3rdparty/site_config/standard/autocar.co.uk.txt @@ -0,0 +1,13 @@ +title: //div[@class='col-center']/h1 +author: //div[@class='personality']/a +date: //div[@class='personality-date'] +body: //div[@class='content-top ']//div[@class='content'][1] | //div[contains(@class,'article-body')] | //div[contains(@class,'main-article')] + +next_page_link: //div[@id='review-link']/a + +strip: //div[@class='author-block'] +strip: //p//iframe[contains(@src,'signup')]/preceding::p[1] + +test_url: http://www.autocar.co.uk/car-review/volkswagen/golf +test_url: http://www.autocar.co.uk/car-news/pebble-beach/saleen-unveils-performance-electric-vehicle-based-tesla-model-s +test_url: http://www.autocar.co.uk/car-review/rolls-royce/first-drives/rolls-royce-ghost-series-ii-first-drive-review diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt index ef1f491a..bad77654 100755 --- a/inc/3rdparty/site_config/standard/bbc.co.uk.txt +++ b/inc/3rdparty/site_config/standard/bbc.co.uk.txt @@ -13,7 +13,7 @@ body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] #strip: //div[@class="story-feature narrow"] #strip: //div[@class="story-feature wide"] #strip: //div[@class="story-feature dslideshow-enclosure"] -strip: //div[contains(@class, "story-feature")] +strip: //div[contains(@class, "story-feature") and not(contains(@class, 'full-width'))] strip: //span[@class="story-date"] #strip: //div[@class="caption body-narrow-width"] strip: //div[@class="warning"]//p @@ -30,13 +30,26 @@ strip: //div[contains(@class, 'comment-introduction')] strip: //div[contains(@class, 'share-tools')] strip: //div[@id='also-related-links'] +strip_id_or_class: share-help +strip_id_or_class: comments_module + replace_string(

replace_string():
+tidy: no prune: no dissolve: //h2 + test_url: http://www.bbc.co.uk/sport/0/football/23224017 +test_contains: Swansea City have completed the club-record signing + test_url: http://www.bbc.co.uk/news/business-15060862 +test_contains: Europe's leaders are meeting again to try to solve + +# news feed +test_url: http://feeds.bbci.co.uk/news/rss.xml +# sports feed +test_url: http://feeds.bbci.co.uk/sport/0/football/rss.xml?edition=int # video entry -test_url: http://www.bbc.co.uk/news/world-asia-22056933 \ No newline at end of file +test_url: http://www.bbc.co.uk/news/world-asia-22056933 diff --git a/inc/3rdparty/site_config/standard/bbc.com.txt b/inc/3rdparty/site_config/standard/bbc.com.txt new file mode 100755 index 00000000..c04a683e --- /dev/null +++ b/inc/3rdparty/site_config/standard/bbc.com.txt @@ -0,0 +1,60 @@ +body: //div[@class="story-body"] +# for video entries +body: //div[contains(@class, "videoInStory") or @id="meta-information"] +title: //h1[@class="story-header"] +date: //span[@class="story-date"]/span[@class='date'] +# for sport site +date: //meta[@name='DCTERMS.created']/@content +author: //div[@id='headline']//span[@class='byline-name'] + +# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 +body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] + +#strip: //div[@class="story-feature narrow"] +#strip: //div[@class="story-feature wide"] +#strip: //div[@class="story-feature dslideshow-enclosure"] +strip: //div[contains(@class, "story-feature") and not(contains(@class, 'full-width'))] +strip: //span[@class="story-date"] +#strip: //div[@class="caption body-narrow-width"] +strip: //div[@class="warning"]//p +strip: //div[@id='page-bookmark-links-head'] +strip: //object +strip: //div[contains(@class, "bbccom_advert_placeholder")] +strip: //div[contains(@class, "embedded-hyper")] +strip: //div[contains(@class, 'market-data')] +strip: //a[contains(@class, 'hidden')] +strip: //div[contains(@class, 'hypertabs')] +strip: //div[contains(@class, 'related')] +strip: //form[@id='comment-form'] +strip: //div[contains(@class, 'comment-introduction')] +strip: //div[contains(@class, 'share-tools')] +strip: //div[@id='also-related-links'] + +strip_id_or_class: share-help +strip_id_or_class: comments_module + +replace_string(