]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/config.php
e618117b7190da886aa40731fba02896f4521664
[github/wallabag/wallabag.git] / inc / 3rdparty / config.php
1 <?php
2 /* Full-Text RSS config */
3
4 // ......IMPORTANT......................................
5 // .....................................................
6 // Please do not change this file (config.php) directly.
7 // Save a copy as custom_config.php and make your
8 // changes to that instead. It will automatically
9 // override anything in config.php. Because config.php
10 // always gets loaded anyway, you can simply specify
11 // options you'd like to override in custom_config.php.
12 // .....................................................
13
14 global $options;
15
16 // Create config object
17 if (!isset($options)) $options = new stdClass();
18
19 // Enable service
20 // ----------------------
21 // Set this to false if you want to disable the service.
22 // If set to false, no feed is produced and users will
23 // be told that the service is disabled.
24 $options->enabled = true;
25
26 // Debug mode
27 // ----------------------
28 // Enable or disable debugging. When enabled debugging works by passing
29 // &debug to the makefulltextfeed.php querystring.
30 // Valid values:
31 // true or 'user' (default) - let user decide
32 // 'admin' - debug works only for logged in admin users
33 // false - disabled
34 $options->debug = true;
35
36 // Default entries (without access key)
37 // ----------------------
38 // The number of feed items to process when no API key is supplied
39 // and no &max=x value is supplied in the querystring.
40 $options->default_entries = 5;
41
42 // Max entries (without access key)
43 // ----------------------
44 // The maximum number of feed items to process when no access key is supplied.
45 // This limits the user-supplied &max=x value. For example, if the user
46 // asks for 20 items to be processed (&max=20), if max_entries is set to
47 // 10, only 10 will be processed.
48 $options->max_entries = 10;
49
50 // Rewrite relative URLs
51 // ----------------------
52 // With this enabled relative URLs found in the extracted content
53 // block are automatically rewritten as absolute URLs.
54 $options->rewrite_relative_urls = true;
55
56 // Exclude items if extraction fails
57 // ---------------------------------
58 // Excludes items from the resulting feed
59 // if we cannot extract any content from the
60 // item URL.
61 // Possible values...
62 // Enable: true
63 // Disable: false (default)
64 // User decides: 'user' (this option will appear on the form)
65 $options->exclude_items_on_fail = 'user';
66
67 // Enable multi-page support
68 // -------------------------
69 // If enabled, we will try to follow next page links on multi-page articles.
70 // Currently this only happens for sites where next_page_link has been defined
71 // in a site config file.
72 $options->multipage = true;
73
74 // Enable caching
75 // ----------------------
76 // Enable this if you'd like to cache results
77 // for 10 minutes. Cache files are written to disk (in cache/ subfolders
78 // - which must be writable).
79 // Initially it's best to keep this disabled to make sure everything works
80 // as expected. If you have APC enabled, please also see smart_cache in the
81 // advanced section.
82 $options->caching = false;
83
84 // Cache directory
85 // ----------------------
86 // Only used if caching is true
87 $options->cache_dir = dirname(__FILE__).'/cache';
88
89 // Message to prepend (without access key)
90 // ----------------------
91 // HTML to insert at the beginning of each feed item when no access key is supplied.
92 // Substitution tags:
93 // {url} - Feed item URL
94 // {effective-url} - Feed item URL after we've followed all redirects
95 $options->message_to_prepend = '';
96
97 // Message to append (without access key)
98 // ----------------------
99 // HTML to insert at the end of each feed item when no access key is supplied.
100 // Substitution tags:
101 // {url} - Feed item URL
102 // {effective-url} - Feed item URL after we've followed all redirects
103 $options->message_to_append = '';
104
105 // Error message when content extraction fails (without access key)
106 // ----------------------
107 $options->error_message = '[unable to retrieve full-text content]';
108
109 // Keep enclosure in feed items
110 // If enabled, we will try to preserve enclosures if present.
111 // ----------------------
112 $options->keep_enclosures = true;
113
114 // Detect language
115 // ---------------
116 // Should we try and find/guess the language of the article being processed?
117 // Values will be placed inside the <dc:language> element inside each <item> element
118 // Possible values:
119 // * Ignore language: 0
120 // * Use article/feed metadata (e.g. HTML lang attribute): 1 (default)
121 // * As above, but guess if not present: 2
122 // * Always guess: 3
123 // * User decides: 'user' (value of 0-3 can be passed in querystring: e.g. &l=2)
124 $options->detect_language = 1;
125
126 // Registration key
127 // ---------------
128 // The registration key is optional. It is not required to use Full-Text RSS,
129 // and does not affect the normal operation of Full-Text RSS. It is currently
130 // only used on admin pages which help you update site patterns with the
131 // latest version offered by FiveFilters.org. For these admin-related
132 // tasks to complete, we will require a valid registration key.
133 // If you would like one, you can purchase the latest version of Full-Text RSS
134 // at http://fivefilters.org/content-only/
135 // Your registration key will automatically be sent in the confirmation email.
136 // Once you have it, simply copy and paste it here.
137 $options->registration_key = '';
138
139 /////////////////////////////////////////////////
140 /// RESTRICT ACCESS /////////////////////////////
141 /////////////////////////////////////////////////
142
143 // Admin credentials
144 // ----------------------
145 // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
146 // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
147 // If no password or username is set, pages requiring admin privelages will be inaccessible.
148 // The default username is 'admin'.
149 // If overriding with an environment variable, separate username and password with a colon, e.g.:
150 // ftr_admin_credentials: admin:my-secret-password
151 // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
152 $options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
153
154 // URLs to allow
155 // ----------------------
156 // List of URLs (or parts of a URL) which the service will accept.
157 // If the list is empty, all URLs (except those specified in the blocked list below)
158 // will be permitted.
159 // Empty: array();
160 // Non-empty example: array('example.com', 'anothersite.org');
161 $options->allowed_urls = array();
162
163 // URLs to block
164 // ----------------------
165 // List of URLs (or parts of a URL) which the service will not accept.
166 // Note: this list is ignored if allowed_urls is not empty
167 $options->blocked_urls = array();
168
169 // Key holder(s) only?
170 // ----------------------
171 // Set this to true if you want to restrict access only to
172 // those with a key (see below to specify key(s)).
173 // If set to true, no feed is produced unless a valid
174 // key is provided.
175 $options->key_required = false;
176
177 // Favour item titles in feed
178 // ----------------------
179 // By default, when processing feeds, we assume item titles in the feed
180 // have not been truncated. So after processing web pages, the extracted titles
181 // are not used in the generated feed. If you prefer to have extracted titles in
182 // the feed you can either set this to false, in which case we will always favour
183 // extracted titles. Alternatively, if set to 'user' (default) we'll use the
184 // extracted title if you pass '&use_extracted_title' in the querystring.
185 // Possible values:
186 // * Favour feed titles: true
187 // * Favour extracted titles: false
188 // * Favour feed titles with user override: 'user' (default)
189 // Note: this has no effect when the input URL is to a web page - in these cases
190 // we always use the extracted title in the generated feed.
191 $options->favour_feed_titles = 'user';
192
193 // Access keys (password protected access)
194 // ------------------------------------
195 // NOTE: You do not need an API key from fivefilters.org to run your own
196 // copy of the code. This is here if you'd like to restrict access to
197 // _your_ copy.
198 // Keys let you group users - those with a key and those without - and
199 // restrict access to the service to those without a key.
200 // If you want everyone to access the service in the same way, you can
201 // leave the array below empty and ignore the access key options further down.
202 // The options further down let you control how the service should behave
203 // in each mode.
204 // Note: Explicitly including the index number (1 and 2 in the examples below)
205 // is highly recommended (when generating feeds, we encode the key and
206 // refer to it by index number and hash).
207 $options->api_keys = array();
208 // Example:
209 // $options->api_keys[1] = 'secret-key-1';
210 // $options->api_keys[2] = 'secret-key-2';
211
212 // Default entries (with access key)
213 // ----------------------
214 // The number of feed items to process when a valid access key is supplied.
215 $options->default_entries_with_key = 5;
216
217 // Max entries (with access key)
218 // ----------------------
219 // The maximum number of feed items to process when a valid access key is supplied.
220 $options->max_entries_with_key = 10;
221
222 /////////////////////////////////////////////////
223 /// ADVANCED OPTIONS ////////////////////////////
224 /////////////////////////////////////////////////
225
226 // Enable XSS filter?
227 // ----------------------
228 // We have not enabled this by default because we assume the majority of
229 // our users do not display the HTML retrieved by Full-Text RSS
230 // in a web page without further processing. If you subscribe to our generated
231 // feeds in your news reader application, it should, if it's good software, already
232 // filter the resulting HTML for XSS attacks, making it redundant for
233 // Full-Text RSS do the same. Similarly with frameworks/CMS which display
234 // feed content - the content should be treated like any other user-submitted content.
235 //
236 // If you are writing an application yourself which is processing feeds generated by
237 // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
238 // or enable this option. This might be useful if you are processing our generated
239 // feeds with JavaScript on the client side - although there's client side xss
240 // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
241 //
242 // If enabled, we'll pass retrieved HTML content through htmLawed with
243 // safe flag on and style attributes denied, see
244 // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
245 // Note: if enabled this will also remove certain elements you may want to preserve, such as iframes.
246 //
247 // Valid values:
248 // true - enabled, all content will be filtered
249 // 'user' (default) - user must pass &xss in makefulltextfeed.php querystring to enable
250 // false - disabled
251 $options->xss_filter = 'user';
252
253 // Allowed parsers
254 // ----------------------
255 // Full-Text RSS attempts to use PHP's libxml extension to process HTML.
256 // While fast, on some sites it may not always produce good results.
257 // For these sites, you can specify an alternative HTML parser:
258 // parser: html5lib
259 // The html5lib parser is bundled with Full-Text RSS.
260 // see http://code.google.com/p/html5lib/
261 //
262 // To disable HTML parsing with html5lib, you can remove it from this list.
263 // By default we allow both: libxml and html5lib.
264 $options->allowed_parsers = array('libxml', 'html5lib');
265 //$options->allowed_parsers = array('libxml'); //disable html5lib - forcing libxml in all cases
266
267 // Enable Cross-Origin Resource Sharing (CORS)
268 // ----------------------
269 // If enabled we'll send the following HTTP header
270 // Access-Control-Allow-Origin: *
271 // see http://en.wikipedia.org/wiki/Cross-origin_resource_sharing
272 $options->cors = false;
273
274 // Use APC user cache?
275 // ----------------------
276 // If enabled we will store site config files (when requested
277 // for the first time) in APC's user cache. Keys prefixed with 'sc.'
278 // This improves performance by reducing disk access.
279 // Note: this has no effect if APC is unavailable on your server.
280 $options->apc = true;
281
282 // Smart cache (experimental)
283 // ----------------------
284 // With this option enabled we will not cache to disk immediately.
285 // We will store the cache key in APC and if it's requested again
286 // we will cache results to disk. Keys prefixed with 'cache.'
287 // This improves performance by reducing disk access.
288 // Note: this has no effect if APC is disabled or unavailable on your server,
289 // or if you have caching disabled.
290 $options->smart_cache = true;
291
292 // Fingerprints
293 // ----------------------
294 // key is fingerprint (fragment to find in HTML)
295 // value is host name to use for site config lookup if fingerprint matches
296 $options->fingerprints = array(
297 // Posterous
298 '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),
299 // Blogger
300 '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
301 '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
302 // WordPress (hosted)
303 // '<meta name="generator" content="WordPress.com"' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true),
304 // WordPress (self-hosted and hosted)
305 '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)
306 );
307
308 // User Agent strings - mapping domain names
309 // ----------------------
310 // e.g. $options->user_agents = array('example.org' => 'PHP/5.2');
311 $options->user_agents = array( 'lifehacker.com' => 'PHP/5.2',
312 'gawker.com' => 'PHP/5.2',
313 'deadspin.com' => 'PHP/5.2',
314 'kotaku.com' => 'PHP/5.2',
315 'jezebel.com' => 'PHP/5.2',
316 'io9.com' => 'PHP/5.2',
317 'jalopnik.com' => 'PHP/5.2',
318 'gizmodo.com' => 'PHP/5.2',
319 '.wikipedia.org' => 'Mozilla/5.2',
320 '.fok.nl' => 'Googlebot/2.1',
321 'getpocket.com' => 'PHP/5.2'
322 );
323
324 // URL Rewriting
325 // ----------------------
326 // Currently allows simple string replace of URLs.
327 // Useful for rewriting certain URLs to point to a single page
328 // or HTML view. Although using the single_page_link site config
329 // instruction is the preferred way to do this, sometimes, as
330 // with Google Docs URLs, it's not possible.
331 // Note: this might move to the site config file at some point.
332 $options->rewrite_url = array(
333 // Rewrite public Google Docs URLs to point to HTML view:
334 // if a URL contains docs.google.com, replace /Doc? with /View?
335 'docs.google.com' => array('/Doc?' => '/View?'),
336 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),
337 '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'),
338 'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com')
339 );
340
341 // Content-Type exceptions
342 // -----------------------
343 // Here you can define different actions based
344 // on the Content-Type header returned by server.
345 // MIME type as key, action as value.
346 // Valid actions:
347 // * 'exclude' - exclude this item from the result
348 // * 'link' - create HTML link to the item
349 $options->content_type_exc = array(
350 'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
351 'image' => array('action'=>'link', 'name'=>'Image'),
352 'audio' => array('action'=>'link', 'name'=>'Audio'),
353 'video' => array('action'=>'link', 'name'=>'Video')
354 );
355
356 // Cache directory level
357 // ----------------------
358 // Spread cache files over different directories (only used if caching is enabled).
359 // Used to prevent large number of files in one directory.
360 // This corresponds to Zend_Cache's hashed_directory_level
361 // see http://framework.zend.com/manual/en/zend.cache.backends.html
362 // It's best not to change this if you're unsure.
363 $options->cache_directory_level = 0;
364
365 // Cache cleanup
366 // -------------
367 // 0 = script will not clean cache (rename cachecleanup.php and use it for scheduled (e.g. cron) cache cleanup)
368 // 1 = clean cache everytime the script runs (not recommended)
369 // 100 = clean cache roughly once every 100 script runs
370 // x = clean cache roughly once every x script runs
371 // ...you get the idea :)
372 $options->cache_cleanup = 100;
373
374 /////////////////////////////////////////////////
375 /// DO NOT CHANGE ANYTHING BELOW THIS ///////////
376 /////////////////////////////////////////////////
377
378 if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
379
380 if (basename(__FILE__) == 'config.php') {
381 if (file_exists(dirname(__FILE__).'/custom_config.php')) {
382 require_once dirname(__FILE__).'/custom_config.php';
383 }
384
385 // check for environment variables - often used on cloud platforms
386 // environment variables should be prefixed with 'ftr_', e.g.
387 // ftr_max_entries: 1
388 // will set the max_entries value to 1.
389 foreach ($options as $_key=>&$_val) {
390 $_key = "ftr_$_key";
391 if (($_env = getenv($_key)) !== false) {
392 if (is_array($_val)) {
393 if ($_key === 'ftr_admin_credentials') {
394 $_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2)));
395 if ($_val === false) $_val = array('username'=>'admin', 'password'=>'');
396 }
397 } elseif ($_env === 'true' || $_env === 'false') {
398 $_val = ($_env === 'true');
399 } elseif (is_numeric($_env)) {
400 $_val = (int)$_env;
401 } else { // string
402 $_val = $_env;
403 }
404 }
405 }
406 unset($_key, $_val, $_env);
407 }