]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/config.php
[change] we now use Full-Text RSS 3.1, thank you so much @fivefilters
[github/wallabag/wallabag.git] / inc / 3rdparty / config.php
1 <?php
2 /* Full-Text RSS config */
3
4 // ......IMPORTANT......................................
5 // .....................................................
6 // Please do not change this file (config.php) directly.
7 // Save a copy as custom_config.php and make your
8 // changes to that instead. It will automatically
9 // override anything in config.php. Because config.php
10 // always gets loaded anyway, you can simply specify
11 // options you'd like to override in custom_config.php.
12 // .....................................................
13
14 // Create config object
15 if (!isset($options)) $options = new stdClass();
16
17 // Enable service
18 // ----------------------
19 // Set this to false if you want to disable the service.
20 // If set to false, no feed is produced and users will
21 // be told that the service is disabled.
22 $options->enabled = true;
23
24 // Debug mode
25 // ----------------------
26 // Enable or disable debugging. When enabled debugging works by passing
27 // &debug to the makefulltextfeed.php querystring.
28 // Valid values:
29 // true or 'user' (default) - let user decide
30 // 'admin' - debug works only for logged in admin users
31 // false - disabled
32 $options->debug = true;
33
34 // Default entries (without access key)
35 // ----------------------
36 // The number of feed items to process when no API key is supplied
37 // and no &max=x value is supplied in the querystring.
38 $options->default_entries = 5;
39
40 // Max entries (without access key)
41 // ----------------------
42 // The maximum number of feed items to process when no access key is supplied.
43 // This limits the user-supplied &max=x value. For example, if the user
44 // asks for 20 items to be processed (&max=20), if max_entries is set to
45 // 10, only 10 will be processed.
46 $options->max_entries = 10;
47
48 // Rewrite relative URLs
49 // ----------------------
50 // With this enabled relative URLs found in the extracted content
51 // block are automatically rewritten as absolute URLs.
52 $options->rewrite_relative_urls = true;
53
54 // Exclude items if extraction fails
55 // ---------------------------------
56 // Excludes items from the resulting feed
57 // if we cannot extract any content from the
58 // item URL.
59 // Possible values...
60 // Enable: true
61 // Disable: false (default)
62 // User decides: 'user' (this option will appear on the form)
63 $options->exclude_items_on_fail = 'user';
64
65 // Enable multi-page support
66 // -------------------------
67 // If enabled, we will try to follow next page links on multi-page articles.
68 // Currently this only happens for sites where next_page_link has been defined
69 // in a site config file.
70 $options->multipage = true;
71
72 // Enable caching
73 // ----------------------
74 // Enable this if you'd like to cache results
75 // for 10 minutes. Cache files are written to disk (in cache/ subfolders
76 // - which must be writable).
77 // Initially it's best to keep this disabled to make sure everything works
78 // as expected. If you have APC enabled, please also see smart_cache in the
79 // advanced section.
80 $options->caching = false;
81
82 // Cache directory
83 // ----------------------
84 // Only used if caching is true
85 $options->cache_dir = dirname(__FILE__).'/cache';
86
87 // Message to prepend (without access key)
88 // ----------------------
89 // HTML to insert at the beginning of each feed item when no access key is supplied.
90 // Substitution tags:
91 // {url} - Feed item URL
92 // {effective-url} - Feed item URL after we've followed all redirects
93 $options->message_to_prepend = '';
94
95 // Message to append (without access key)
96 // ----------------------
97 // HTML to insert at the end of each feed item when no access key is supplied.
98 // Substitution tags:
99 // {url} - Feed item URL
100 // {effective-url} - Feed item URL after we've followed all redirects
101 $options->message_to_append = '';
102
103 // Error message when content extraction fails (without access key)
104 // ----------------------
105 $options->error_message = '[unable to retrieve full-text content]';
106
107 // Keep enclosure in feed items
108 // If enabled, we will try to preserve enclosures if present.
109 // ----------------------
110 $options->keep_enclosures = true;
111
112 // Detect language
113 // ---------------
114 // Should we try and find/guess the language of the article being processed?
115 // Values will be placed inside the <dc:language> element inside each <item> element
116 // Possible values:
117 // * Ignore language: 0
118 // * Use article/feed metadata (e.g. HTML lang attribute): 1 (default)
119 // * As above, but guess if not present: 2
120 // * Always guess: 3
121 // * User decides: 'user' (value of 0-3 can be passed in querystring: e.g. &l=2)
122 $options->detect_language = 1;
123
124 // Registration key
125 // ---------------
126 // The registration key is optional. It is not required to use Full-Text RSS,
127 // and does not affect the normal operation of Full-Text RSS. It is currently
128 // only used on admin pages which help you update site patterns with the
129 // latest version offered by FiveFilters.org. For these admin-related
130 // tasks to complete, we will require a valid registration key.
131 // If you would like one, you can purchase the latest version of Full-Text RSS
132 // at http://fivefilters.org/content-only/
133 // Your registration key will automatically be sent in the confirmation email.
134 // Once you have it, simply copy and paste it here.
135 $options->registration_key = '';
136
137 /////////////////////////////////////////////////
138 /// RESTRICT ACCESS /////////////////////////////
139 /////////////////////////////////////////////////
140
141 // Admin credentials
142 // ----------------------
143 // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
144 // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
145 // If no password or username is set, pages requiring admin privelages will be inaccessible.
146 // The default username is 'admin'.
147 // If overriding with an environment variable, separate username and password with a colon, e.g.:
148 // ftr_admin_credentials: admin:my-secret-password
149 // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
150 $options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
151
152 // URLs to allow
153 // ----------------------
154 // List of URLs (or parts of a URL) which the service will accept.
155 // If the list is empty, all URLs (except those specified in the blocked list below)
156 // will be permitted.
157 // Empty: array();
158 // Non-empty example: array('example.com', 'anothersite.org');
159 $options->allowed_urls = array();
160
161 // URLs to block
162 // ----------------------
163 // List of URLs (or parts of a URL) which the service will not accept.
164 // Note: this list is ignored if allowed_urls is not empty
165 $options->blocked_urls = array();
166
167 // Key holder(s) only?
168 // ----------------------
169 // Set this to true if you want to restrict access only to
170 // those with a key (see below to specify key(s)).
171 // If set to true, no feed is produced unless a valid
172 // key is provided.
173 $options->key_required = false;
174
175 // Favour item titles in feed
176 // ----------------------
177 // By default, when processing feeds, we assume item titles in the feed
178 // have not been truncated. So after processing web pages, the extracted titles
179 // are not used in the generated feed. If you prefer to have extracted titles in
180 // the feed you can either set this to false, in which case we will always favour
181 // extracted titles. Alternatively, if set to 'user' (default) we'll use the
182 // extracted title if you pass '&use_extracted_title' in the querystring.
183 // Possible values:
184 // * Favour feed titles: true
185 // * Favour extracted titles: false
186 // * Favour feed titles with user override: 'user' (default)
187 // Note: this has no effect when the input URL is to a web page - in these cases
188 // we always use the extracted title in the generated feed.
189 $options->favour_feed_titles = 'user';
190
191 // Access keys (password protected access)
192 // ------------------------------------
193 // NOTE: You do not need an API key from fivefilters.org to run your own
194 // copy of the code. This is here if you'd like to restrict access to
195 // _your_ copy.
196 // Keys let you group users - those with a key and those without - and
197 // restrict access to the service to those without a key.
198 // If you want everyone to access the service in the same way, you can
199 // leave the array below empty and ignore the access key options further down.
200 // The options further down let you control how the service should behave
201 // in each mode.
202 // Note: Explicitly including the index number (1 and 2 in the examples below)
203 // is highly recommended (when generating feeds, we encode the key and
204 // refer to it by index number and hash).
205 $options->api_keys = array();
206 // Example:
207 // $options->api_keys[1] = 'secret-key-1';
208 // $options->api_keys[2] = 'secret-key-2';
209
210 // Default entries (with access key)
211 // ----------------------
212 // The number of feed items to process when a valid access key is supplied.
213 $options->default_entries_with_key = 5;
214
215 // Max entries (with access key)
216 // ----------------------
217 // The maximum number of feed items to process when a valid access key is supplied.
218 $options->max_entries_with_key = 10;
219
220 /////////////////////////////////////////////////
221 /// ADVANCED OPTIONS ////////////////////////////
222 /////////////////////////////////////////////////
223
224 // Enable XSS filter?
225 // ----------------------
226 // We have not enabled this by default because we assume the majority of
227 // our users do not display the HTML retrieved by Full-Text RSS
228 // in a web page without further processing. If you subscribe to our generated
229 // feeds in your news reader application, it should, if it's good software, already
230 // filter the resulting HTML for XSS attacks, making it redundant for
231 // Full-Text RSS do the same. Similarly with frameworks/CMS which display
232 // feed content - the content should be treated like any other user-submitted content.
233 //
234 // If you are writing an application yourself which is processing feeds generated by
235 // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
236 // or enable this option. This might be useful if you are processing our generated
237 // feeds with JavaScript on the client side - although there's client side xss
238 // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
239 //
240 // If enabled, we'll pass retrieved HTML content through htmLawed with
241 // safe flag on and style attributes denied, see
242 // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
243 // Note: if enabled this will also remove certain elements you may want to preserve, such as iframes.
244 //
245 // Valid values:
246 // true - enabled, all content will be filtered
247 // 'user' (default) - user must pass &xss in makefulltextfeed.php querystring to enable
248 // false - disabled
249 $options->xss_filter = 'user';
250
251 // Allowed parsers
252 // ----------------------
253 // Full-Text RSS attempts to use PHP's libxml extension to process HTML.
254 // While fast, on some sites it may not always produce good results.
255 // For these sites, you can specify an alternative HTML parser:
256 // parser: html5lib
257 // The html5lib parser is bundled with Full-Text RSS.
258 // see http://code.google.com/p/html5lib/
259 //
260 // To disable HTML parsing with html5lib, you can remove it from this list.
261 // By default we allow both: libxml and html5lib.
262 $options->allowed_parsers = array('libxml', 'html5lib');
263 //$options->allowed_parsers = array('libxml'); //disable html5lib - forcing libxml in all cases
264
265 // Enable Cross-Origin Resource Sharing (CORS)
266 // ----------------------
267 // If enabled we'll send the following HTTP header
268 // Access-Control-Allow-Origin: *
269 // see http://en.wikipedia.org/wiki/Cross-origin_resource_sharing
270 $options->cors = false;
271
272 // Use APC user cache?
273 // ----------------------
274 // If enabled we will store site config files (when requested
275 // for the first time) in APC's user cache. Keys prefixed with 'sc.'
276 // This improves performance by reducing disk access.
277 // Note: this has no effect if APC is unavailable on your server.
278 $options->apc = true;
279
280 // Smart cache (experimental)
281 // ----------------------
282 // With this option enabled we will not cache to disk immediately.
283 // We will store the cache key in APC and if it's requested again
284 // we will cache results to disk. Keys prefixed with 'cache.'
285 // This improves performance by reducing disk access.
286 // Note: this has no effect if APC is disabled or unavailable on your server,
287 // or if you have caching disabled.
288 $options->smart_cache = true;
289
290 // Fingerprints
291 // ----------------------
292 // key is fingerprint (fragment to find in HTML)
293 // value is host name to use for site config lookup if fingerprint matches
294 $options->fingerprints = array(
295 // Posterous
296 '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),
297 // Blogger
298 '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
299 '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
300 // WordPress (hosted)
301 // '<meta name="generator" content="WordPress.com"' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true),
302 // WordPress (self-hosted and hosted)
303 '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)
304 );
305
306 // User Agent strings - mapping domain names
307 // ----------------------
308 // e.g. $options->user_agents = array('example.org' => 'PHP/5.2');
309 $options->user_agents = array( 'lifehacker.com' => 'PHP/5.2',
310 'gawker.com' => 'PHP/5.2',
311 'deadspin.com' => 'PHP/5.2',
312 'kotaku.com' => 'PHP/5.2',
313 'jezebel.com' => 'PHP/5.2',
314 'io9.com' => 'PHP/5.2',
315 'jalopnik.com' => 'PHP/5.2',
316 'gizmodo.com' => 'PHP/5.2',
317 '.wikipedia.org' => 'Mozilla/5.2',
318 '.fok.nl' => 'Googlebot/2.1',
319 'getpocket.com' => 'PHP/5.2'
320 );
321
322 // URL Rewriting
323 // ----------------------
324 // Currently allows simple string replace of URLs.
325 // Useful for rewriting certain URLs to point to a single page
326 // or HTML view. Although using the single_page_link site config
327 // instruction is the preferred way to do this, sometimes, as
328 // with Google Docs URLs, it's not possible.
329 // Note: this might move to the site config file at some point.
330 $options->rewrite_url = array(
331 // Rewrite public Google Docs URLs to point to HTML view:
332 // if a URL contains docs.google.com, replace /Doc? with /View?
333 'docs.google.com' => array('/Doc?' => '/View?'),
334 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),
335 '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'),
336 'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com')
337 );
338
339 // Content-Type exceptions
340 // -----------------------
341 // Here you can define different actions based
342 // on the Content-Type header returned by server.
343 // MIME type as key, action as value.
344 // Valid actions:
345 // * 'exclude' - exclude this item from the result
346 // * 'link' - create HTML link to the item
347 $options->content_type_exc = array(
348 'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
349 'image' => array('action'=>'link', 'name'=>'Image'),
350 'audio' => array('action'=>'link', 'name'=>'Audio'),
351 'video' => array('action'=>'link', 'name'=>'Video')
352 );
353
354 // Cache directory level
355 // ----------------------
356 // Spread cache files over different directories (only used if caching is enabled).
357 // Used to prevent large number of files in one directory.
358 // This corresponds to Zend_Cache's hashed_directory_level
359 // see http://framework.zend.com/manual/en/zend.cache.backends.html
360 // It's best not to change this if you're unsure.
361 $options->cache_directory_level = 0;
362
363 // Cache cleanup
364 // -------------
365 // 0 = script will not clean cache (rename cachecleanup.php and use it for scheduled (e.g. cron) cache cleanup)
366 // 1 = clean cache everytime the script runs (not recommended)
367 // 100 = clean cache roughly once every 100 script runs
368 // x = clean cache roughly once every x script runs
369 // ...you get the idea :)
370 $options->cache_cleanup = 100;
371
372 /////////////////////////////////////////////////
373 /// DO NOT CHANGE ANYTHING BELOW THIS ///////////
374 /////////////////////////////////////////////////
375
376 if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
377
378 if (basename(__FILE__) == 'config.php') {
379 if (file_exists(dirname(__FILE__).'/custom_config.php')) {
380 require_once dirname(__FILE__).'/custom_config.php';
381 }
382
383 // check for environment variables - often used on cloud platforms
384 // environment variables should be prefixed with 'ftr_', e.g.
385 // ftr_max_entries: 1
386 // will set the max_entries value to 1.
387 foreach ($options as $_key=>&$_val) {
388 $_key = "ftr_$_key";
389 if (($_env = getenv($_key)) !== false) {
390 if (is_array($_val)) {
391 if ($_key === 'ftr_admin_credentials') {
392 $_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2)));
393 if ($_val === false) $_val = array('username'=>'admin', 'password'=>'');
394 }
395 } elseif ($_env === 'true' || $_env === 'false') {
396 $_val = ($_env === 'true');
397 } elseif (is_numeric($_env)) {
398 $_val = (int)$_env;
399 } else { // string
400 $_val = $_env;
401 }
402 }
403 }
404 unset($_key, $_val, $_env);
405 }