]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/config.php
update to 3.2 version of full-text-rss, issue #694
[github/wallabag/wallabag.git] / inc / 3rdparty / config.php
1 <?php
2 /* Full-Text RSS config */
3
4 // ......IMPORTANT......................................
5 // .....................................................
6 // Please do not change this file (config.php) directly.
7 // Save a copy as custom_config.php and make your
8 // changes to that instead. It will automatically
9 // override anything in config.php. Because config.php
10 // always gets loaded anyway, you can simply specify
11 // options you'd like to override in custom_config.php.
12 // .....................................................
13
14 global $options;
15
16 // Create config object
17 if (!isset($options)) $options = new stdClass();
18
19 // Enable service
20 // ----------------------
21 // Set this to false if you want to disable the service.
22 // If set to false, no feed is produced and users will
23 // be told that the service is disabled.
24 $options->enabled = true;
25
26 // Debug mode
27 // ----------------------
28 // Enable or disable debugging. When enabled debugging works by passing
29 // &debug to the makefulltextfeed.php querystring.
30 // Valid values:
31 // true or 'user' (default) - let user decide
32 // 'admin' - debug works only for logged in admin users
33 // false - disabled
34 $options->debug = true;
35
36 // Default entries (without access key)
37 // ----------------------
38 // The number of feed items to process when no API key is supplied
39 // and no &max=x value is supplied in the querystring.
40 $options->default_entries = 5;
41
42 // Max entries (without access key)
43 // ----------------------
44 // The maximum number of feed items to process when no access key is supplied.
45 // This limits the user-supplied &max=x value. For example, if the user
46 // asks for 20 items to be processed (&max=20), if max_entries is set to
47 // 10, only 10 will be processed.
48 $options->max_entries = 10;
49
50 // Full content
51 // ----------------------
52 // By default Full-Text RSS includes the extracted content in the output.
53 // You can exclude this from the output by passing '&content=0' in the querystring.
54 //
55 // Possible values...
56 // Always include: true
57 // Never include: false
58 // Include unless user overrides (&content=0): 'user' (default)
59 //
60 // Note: currently this does not disable full content extraction. It simply omits it
61 // from the output.
62 $options->content = 'user';
63
64 // Excerpts
65 // ----------------------
66 // By default Full-Text RSS does not include excerpts in the output.
67 // You can enable this by passing '&summary=1' in the querystring.
68 // This will include a plain text excerpt from the extracted content.
69 //
70 // Possible values...
71 // Always include: true (recommended for new users)
72 // Never include: false
73 // Don't include unless user overrides (&summary=1): 'user' (default)
74 //
75 // Important: if both content and excerpts are requested, the excerpt will be
76 // placed in the description element and the full content inside content:encoded.
77 // If excerpts are not requested, the full content will go inside the description element.
78 //
79 // Why are we not returning both excerpts and content by default?
80 // Mainly for backward compatibility.
81 // Excerpts should appear in the feed item's description element. Previous versions
82 // of Full-Text RSS did not return excerpts, so the description element was always
83 // used for the full content (as recommended by the RSS advisory). When returning both,
84 // we need somewhere else to place the content (content:encoded).
85 // Having both enabled should not create any problems for news readers, but it may create
86 // problems for developers upgrading from one of our earlier versions who may now find
87 // their applications are returning excerpts instead of the full content they were
88 // expecting. To avoid such surprises for users who are upgrading Full-Text RSS,
89 // excerpts must be explicitly requested in the querystring by default.
90 //
91 // Why not use a different element name for excerpts?
92 // According to the RSS advisory:
93 // "Publishers who employ summaries should store the summary in description and
94 // the full content in content:encoded, ordering description first within the item.
95 // On items with no summary, the full content should be stored in description."
96 // See: http://www.rssboard.org/rss-profile#namespace-elements-content-encoded
97 //
98 // For more consistent element naming, we recommend new users set this option to true.
99 // The full content can still be excluded via the querystring, but the element names
100 // will not change: when $options->summary = true, the description element will always
101 // be reserved for the excerpt and content:encoded always for full content.
102 $options->summary = 'user';
103
104 // Rewrite relative URLs
105 // ----------------------
106 // With this enabled relative URLs found in the extracted content
107 // block are automatically rewritten as absolute URLs.
108 $options->rewrite_relative_urls = true;
109
110 // Exclude items if extraction fails
111 // ---------------------------------
112 // Excludes items from the resulting feed
113 // if we cannot extract any content from the
114 // item URL.
115 // Possible values...
116 // Enable: true
117 // Disable: false (default)
118 // User decides: 'user' (this option will appear on the form)
119 $options->exclude_items_on_fail = 'user';
120
121 // Enable multi-page support
122 // -------------------------
123 // If enabled, we will try to follow next page links on multi-page articles.
124 // Currently this only happens for sites where next_page_link has been defined
125 // in a site config file.
126 $options->multipage = true;
127
128 // Enable caching
129 // ----------------------
130 // Enable this if you'd like to cache results
131 // for 10 minutes. Cache files are written to disk (in cache/ subfolders
132 // - which must be writable).
133 // Initially it's best to keep this disabled to make sure everything works
134 // as expected. If you have APC enabled, please also see smart_cache in the
135 // advanced section.
136 $options->caching = false;
137
138 // Cache directory
139 // ----------------------
140 // Only used if caching is true
141 $options->cache_dir = dirname(__FILE__).'/cache';
142
143 // Message to prepend (without access key)
144 // ----------------------
145 // HTML to insert at the beginning of each feed item when no access key is supplied.
146 // Substitution tags:
147 // {url} - Feed item URL
148 // {effective-url} - Feed item URL after we've followed all redirects
149 $options->message_to_prepend = '';
150
151 // Message to append (without access key)
152 // ----------------------
153 // HTML to insert at the end of each feed item when no access key is supplied.
154 // Substitution tags:
155 // {url} - Feed item URL
156 // {effective-url} - Feed item URL after we've followed all redirects
157 $options->message_to_append = '';
158
159 // Error message when content extraction fails (without access key)
160 // ----------------------
161 $options->error_message = '[unable to retrieve full-text content]';
162
163 // Keep enclosure in feed items
164 // If enabled, we will try to preserve enclosures if present.
165 // ----------------------
166 $options->keep_enclosures = true;
167
168 // Detect language
169 // ---------------
170 // Should we try and find/guess the language of the article being processed?
171 // Values will be placed inside the <dc:language> element inside each <item> element
172 // Possible values:
173 // * Ignore language: 0
174 // * Use article/feed metadata (e.g. HTML lang attribute): 1 (default)
175 // * As above, but guess if not present: 2
176 // * Always guess: 3
177 // * User decides: 'user' (value of 0-3 can be passed in querystring: e.g. &l=2)
178 $options->detect_language = 1;
179
180 // Registration key
181 // ---------------
182 // The registration key is optional. It is not required to use Full-Text RSS,
183 // and does not affect the normal operation of Full-Text RSS. It is currently
184 // only used on admin pages which help you update site patterns with the
185 // latest version offered by FiveFilters.org. For these admin-related
186 // tasks to complete, we will require a valid registration key.
187 // If you would like one, you can purchase the latest version of Full-Text RSS
188 // at http://fivefilters.org/content-only/
189 // Your registration key will automatically be sent in the confirmation email.
190 // Once you have it, simply copy and paste it here.
191 $options->registration_key = '';
192
193 /////////////////////////////////////////////////
194 /// RESTRICT ACCESS /////////////////////////////
195 /////////////////////////////////////////////////
196
197 // Admin credentials
198 // ----------------------
199 // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
200 // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
201 // If no password or username is set, pages requiring admin privelages will be inaccessible.
202 // The default username is 'admin'.
203 // If overriding with an environment variable, separate username and password with a colon, e.g.:
204 // ftr_admin_credentials: admin:my-secret-password
205 // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
206 $options->admin_credentials = array('username'=>'admin', 'password'=>'');
207
208 // URLs to allow
209 // ----------------------
210 // List of URLs (or parts of a URL) which the service will accept.
211 // If the list is empty, all URLs (except those specified in the blocked list below)
212 // will be permitted.
213 // Empty: array();
214 // Non-empty example: array('example.com', 'anothersite.org');
215 $options->allowed_urls = array();
216
217 // URLs to block
218 // ----------------------
219 // List of URLs (or parts of a URL) which the service will not accept.
220 // Note: this list is ignored if allowed_urls is not empty
221 $options->blocked_urls = array();
222
223 // Key holder(s) only?
224 // ----------------------
225 // Set this to true if you want to restrict access only to
226 // those with a key (see below to specify key(s)).
227 // If set to true, no feed is produced unless a valid
228 // key is provided.
229 $options->key_required = false;
230
231 // Favour item titles in feed
232 // ----------------------
233 // By default, when processing feeds, we assume item titles in the feed
234 // have not been truncated. So after processing web pages, the extracted titles
235 // are not used in the generated feed. If you prefer to have extracted titles in
236 // the feed you can either set this to false, in which case we will always favour
237 // extracted titles. Alternatively, if set to 'user' (default) we'll use the
238 // extracted title if you pass '&use_extracted_title' in the querystring.
239 // Possible values:
240 // * Favour feed titles: true
241 // * Favour extracted titles: false
242 // * Favour feed titles with user override: 'user' (default)
243 // Note: this has no effect when the input URL is to a web page - in these cases
244 // we always use the extracted title in the generated feed.
245 $options->favour_feed_titles = 'user';
246
247 // Access keys (password protected access)
248 // ------------------------------------
249 // NOTE: You do not need an API key from fivefilters.org to run your own
250 // copy of the code. This is here if you'd like to restrict access to
251 // _your_ copy.
252 // Keys let you group users - those with a key and those without - and
253 // restrict access to the service to those without a key.
254 // If you want everyone to access the service in the same way, you can
255 // leave the array below empty and ignore the access key options further down.
256 // The options further down let you control how the service should behave
257 // in each mode.
258 // Note: Explicitly including the index number (1 and 2 in the examples below)
259 // is highly recommended (when generating feeds, we encode the key and
260 // refer to it by index number and hash).
261 $options->api_keys = array();
262 // Example:
263 // $options->api_keys[1] = 'secret-key-1';
264 // $options->api_keys[2] = 'secret-key-2';
265
266 // Default entries (with access key)
267 // ----------------------
268 // The number of feed items to process when a valid access key is supplied.
269 $options->default_entries_with_key = 5;
270
271 // Max entries (with access key)
272 // ----------------------
273 // The maximum number of feed items to process when a valid access key is supplied.
274 $options->max_entries_with_key = 10;
275
276 /////////////////////////////////////////////////
277 /// ADVANCED OPTIONS ////////////////////////////
278 /////////////////////////////////////////////////
279
280 // Enable XSS filter?
281 // ----------------------
282 // We have not enabled this by default because we assume the majority of
283 // our users do not display the HTML retrieved by Full-Text RSS
284 // in a web page without further processing. If you subscribe to our generated
285 // feeds in your news reader application, it should, if it's good software, already
286 // filter the resulting HTML for XSS attacks, making it redundant for
287 // Full-Text RSS do the same. Similarly with frameworks/CMS which display
288 // feed content - the content should be treated like any other user-submitted content.
289 //
290 // If you are writing an application yourself which is processing feeds generated by
291 // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
292 // or enable this option. This might be useful if you are processing our generated
293 // feeds with JavaScript on the client side - although there's client side xss
294 // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
295 //
296 // If enabled, we'll pass retrieved HTML content through htmLawed with
297 // safe flag on and style attributes denied, see
298 // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
299 // Note: if enabled this will also remove certain elements you may want to preserve, such as iframes.
300 //
301 // Valid values:
302 // true - enabled, all content will be filtered
303 // 'user' (default) - user must pass &xss in makefulltextfeed.php querystring to enable
304 // false - disabled
305 $options->xss_filter = 'user';
306
307 // Allowed parsers
308 // ----------------------
309 // Full-Text RSS attempts to use PHP's libxml extension to process HTML.
310 // While fast, on some sites it may not always produce good results.
311 // For these sites, you can specify an alternative HTML parser:
312 // parser: html5lib
313 // The html5lib parser is bundled with Full-Text RSS.
314 // see http://code.google.com/p/html5lib/
315 //
316 // To disable HTML parsing with html5lib, you can remove it from this list.
317 // By default we allow both: libxml and html5lib.
318 $options->allowed_parsers = array('libxml', 'html5lib');
319 //$options->allowed_parsers = array('libxml'); //disable html5lib - forcing libxml in all cases
320
321 // Enable Cross-Origin Resource Sharing (CORS)
322 // ----------------------
323 // If enabled we'll send the following HTTP header
324 // Access-Control-Allow-Origin: *
325 // see http://en.wikipedia.org/wiki/Cross-origin_resource_sharing
326 $options->cors = false;
327
328 // Use APC user cache?
329 // ----------------------
330 // If enabled we will store site config files (when requested
331 // for the first time) in APC's user cache. Keys prefixed with 'sc.'
332 // This improves performance by reducing disk access.
333 // Note: this has no effect if APC is unavailable on your server.
334 $options->apc = true;
335
336 // Smart cache (experimental)
337 // ----------------------
338 // With this option enabled we will not cache to disk immediately.
339 // We will store the cache key in APC and if it's requested again
340 // we will cache results to disk. Keys prefixed with 'cache.'
341 // This improves performance by reducing disk access.
342 // Note: this has no effect if APC is disabled or unavailable on your server,
343 // or if you have caching disabled.
344 $options->smart_cache = true;
345
346 // Fingerprints
347 // ----------------------
348 // key is fingerprint (fragment to find in HTML)
349 // value is host name to use for site config lookup if fingerprint matches
350 $options->fingerprints = array(
351 // Posterous
352 '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),
353 // Blogger
354 '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
355 '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
356 // WordPress (hosted)
357 // '<meta name="generator" content="WordPress.com"' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true),
358 // WordPress (self-hosted and hosted)
359 '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)
360 );
361
362 // User Agent strings - mapping domain names
363 // ----------------------
364 // e.g. $options->user_agents = array('example.org' => 'PHP/5.2');
365 $options->user_agents = array( 'lifehacker.com' => 'PHP/5.2',
366 'gawker.com' => 'PHP/5.2',
367 'deadspin.com' => 'PHP/5.2',
368 'kotaku.com' => 'PHP/5.2',
369 'jezebel.com' => 'PHP/5.2',
370 'io9.com' => 'PHP/5.2',
371 'jalopnik.com' => 'PHP/5.2',
372 'gizmodo.com' => 'PHP/5.2',
373 '.wikipedia.org' => 'Mozilla/5.2',
374 '.fok.nl' => 'Googlebot/2.1',
375 'getpocket.com' => 'PHP/5.2'
376 );
377
378 // URL Rewriting
379 // ----------------------
380 // Currently allows simple string replace of URLs.
381 // Useful for rewriting certain URLs to point to a single page
382 // or HTML view. Although using the single_page_link site config
383 // instruction is the preferred way to do this, sometimes, as
384 // with Google Docs URLs, it's not possible.
385 // Note: this might move to the site config file at some point.
386 $options->rewrite_url = array(
387 // Rewrite public Google Docs URLs to point to HTML view:
388 // if a URL contains docs.google.com, replace /Doc? with /View?
389 'docs.google.com' => array('/Doc?' => '/View?'),
390 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),
391 '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'),
392 'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com')
393 );
394
395 // Content-Type exceptions
396 // -----------------------
397 // Here you can define different actions based
398 // on the Content-Type header returned by server.
399 // MIME type as key, action as value.
400 // Valid actions:
401 // * 'exclude' - exclude this item from the result
402 // * 'link' - create HTML link to the item
403 $options->content_type_exc = array(
404 'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
405 'image' => array('action'=>'link', 'name'=>'Image'),
406 'audio' => array('action'=>'link', 'name'=>'Audio'),
407 'video' => array('action'=>'link', 'name'=>'Video')
408 );
409
410 // Cache directory level
411 // ----------------------
412 // Spread cache files over different directories (only used if caching is enabled).
413 // Used to prevent large number of files in one directory.
414 // This corresponds to Zend_Cache's hashed_directory_level
415 // see http://framework.zend.com/manual/en/zend.cache.backends.html
416 // It's best not to change this if you're unsure.
417 $options->cache_directory_level = 0;
418
419 // Cache cleanup
420 // -------------
421 // 0 = script will not clean cache (rename cachecleanup.php and use it for scheduled (e.g. cron) cache cleanup)
422 // 1 = clean cache everytime the script runs (not recommended)
423 // 100 = clean cache roughly once every 100 script runs
424 // x = clean cache roughly once every x script runs
425 // ...you get the idea :)
426 $options->cache_cleanup = 100;
427
428 /////////////////////////////////////////////////
429 /// DO NOT CHANGE ANYTHING BELOW THIS ///////////
430 /////////////////////////////////////////////////
431
432 if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.2');
433
434 if (basename(__FILE__) == 'config.php') {
435 if (file_exists(dirname(__FILE__).'/custom_config.php')) {
436 require_once dirname(__FILE__).'/custom_config.php';
437 }
438
439 // check for environment variables - often used on cloud platforms
440 // environment variables should be prefixed with 'ftr_', e.g.
441 // ftr_max_entries: 1
442 // will set the max_entries value to 1.
443 foreach ($options as $_key=>&$_val) {
444 $_key = "ftr_$_key";
445 if (($_env = getenv($_key)) !== false) {
446 if (is_array($_val)) {
447 if ($_key === 'ftr_admin_credentials') {
448 $_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2)));
449 if ($_val === false) $_val = array('username'=>'admin', 'password'=>'');
450 }
451 } elseif ($_env === 'true' || $_env === 'false') {
452 $_val = ($_env === 'true');
453 } elseif (is_numeric($_env)) {
454 $_val = (int)$_env;
455 } else { // string
456 $_val = $_env;
457 }
458 }
459 }
460 unset($_key, $_val, $_env);
461 }