]>
Commit | Line | Data |
---|---|---|
1 | <?php\r | |
2 | /* Full-Text RSS config */\r | |
3 | \r | |
4 | // ......IMPORTANT......................................\r | |
5 | // .....................................................\r | |
6 | // Please do not change this file (config.php) directly.\r | |
7 | // Save a copy as custom_config.php and make your\r | |
8 | // changes to that instead. It will automatically\r | |
9 | // override anything in config.php. Because config.php\r | |
10 | // always gets loaded anyway, you can simply specify\r | |
11 | // options you'd like to override in custom_config.php.\r | |
12 | // .....................................................\r | |
13 | \r | |
14 | global $options;\r | |
15 | \r | |
16 | // Create config object\r | |
17 | if (!isset($options)) $options = new stdClass();\r | |
18 | \r | |
19 | // Enable service\r | |
20 | // ----------------------\r | |
21 | // Set this to false if you want to disable the service.\r | |
22 | // If set to false, no feed is produced and users will \r | |
23 | // be told that the service is disabled.\r | |
24 | $options->enabled = true;\r | |
25 | \r | |
26 | // Debug mode\r | |
27 | // ----------------------\r | |
28 | // Enable or disable debugging. When enabled debugging works by passing\r | |
29 | // &debug to the makefulltextfeed.php querystring.\r | |
30 | // Valid values:\r | |
31 | // true or 'user' (default) - let user decide\r | |
32 | // 'admin' - debug works only for logged in admin users\r | |
33 | // false - disabled\r | |
34 | $options->debug = true;\r | |
35 | \r | |
36 | // Default entries (without access key)\r | |
37 | // ----------------------\r | |
38 | // The number of feed items to process when no API key is supplied\r | |
39 | // and no &max=x value is supplied in the querystring.\r | |
40 | $options->default_entries = 5;\r | |
41 | \r | |
42 | // Max entries (without access key)\r | |
43 | // ----------------------\r | |
44 | // The maximum number of feed items to process when no access key is supplied.\r | |
45 | // This limits the user-supplied &max=x value. For example, if the user\r | |
46 | // asks for 20 items to be processed (&max=20), if max_entries is set to \r | |
47 | // 10, only 10 will be processed.\r | |
48 | $options->max_entries = 10;\r | |
49 | \r | |
50 | // Rewrite relative URLs\r | |
51 | // ----------------------\r | |
52 | // With this enabled relative URLs found in the extracted content\r | |
53 | // block are automatically rewritten as absolute URLs.\r | |
54 | $options->rewrite_relative_urls = true;\r | |
55 | \r | |
56 | // Exclude items if extraction fails\r | |
57 | // ---------------------------------\r | |
58 | // Excludes items from the resulting feed\r | |
59 | // if we cannot extract any content from the\r | |
60 | // item URL.\r | |
61 | // Possible values...\r | |
62 | // Enable: true\r | |
63 | // Disable: false (default)\r | |
64 | // User decides: 'user' (this option will appear on the form)\r | |
65 | $options->exclude_items_on_fail = 'user';\r | |
66 | \r | |
67 | // Enable multi-page support\r | |
68 | // -------------------------\r | |
69 | // If enabled, we will try to follow next page links on multi-page articles.\r | |
70 | // Currently this only happens for sites where next_page_link has been defined \r | |
71 | // in a site config file.\r | |
72 | $options->multipage = true;\r | |
73 | \r | |
74 | // Enable caching\r | |
75 | // ----------------------\r | |
76 | // Enable this if you'd like to cache results\r | |
77 | // for 10 minutes. Cache files are written to disk (in cache/ subfolders\r | |
78 | // - which must be writable).\r | |
79 | // Initially it's best to keep this disabled to make sure everything works\r | |
80 | // as expected. If you have APC enabled, please also see smart_cache in the\r | |
81 | // advanced section.\r | |
82 | $options->caching = false;\r | |
83 | \r | |
84 | // Cache directory\r | |
85 | // ----------------------\r | |
86 | // Only used if caching is true\r | |
87 | $options->cache_dir = dirname(__FILE__).'/cache';\r | |
88 | \r | |
89 | // Message to prepend (without access key)\r | |
90 | // ----------------------\r | |
91 | // HTML to insert at the beginning of each feed item when no access key is supplied.\r | |
92 | // Substitution tags:\r | |
93 | // {url} - Feed item URL\r | |
94 | // {effective-url} - Feed item URL after we've followed all redirects\r | |
95 | $options->message_to_prepend = '';\r | |
96 | \r | |
97 | // Message to append (without access key)\r | |
98 | // ----------------------\r | |
99 | // HTML to insert at the end of each feed item when no access key is supplied.\r | |
100 | // Substitution tags:\r | |
101 | // {url} - Feed item URL\r | |
102 | // {effective-url} - Feed item URL after we've followed all redirects\r | |
103 | $options->message_to_append = '';\r | |
104 | \r | |
105 | // Error message when content extraction fails (without access key)\r | |
106 | // ----------------------\r | |
107 | $options->error_message = '[unable to retrieve full-text content]';\r | |
108 | \r | |
109 | // Keep enclosure in feed items\r | |
110 | // If enabled, we will try to preserve enclosures if present.\r | |
111 | // ----------------------\r | |
112 | $options->keep_enclosures = true;\r | |
113 | \r | |
114 | // Detect language\r | |
115 | // ---------------\r | |
116 | // Should we try and find/guess the language of the article being processed?\r | |
117 | // Values will be placed inside the <dc:language> element inside each <item> element\r | |
118 | // Possible values:\r | |
119 | // * Ignore language: 0\r | |
120 | // * Use article/feed metadata (e.g. HTML lang attribute): 1 (default)\r | |
121 | // * As above, but guess if not present: 2\r | |
122 | // * Always guess: 3\r | |
123 | // * User decides: 'user' (value of 0-3 can be passed in querystring: e.g. &l=2)\r | |
124 | $options->detect_language = 1;\r | |
125 | \r | |
126 | // Registration key\r | |
127 | // ---------------\r | |
128 | // The registration key is optional. It is not required to use Full-Text RSS, \r | |
129 | // and does not affect the normal operation of Full-Text RSS. It is currently \r | |
130 | // only used on admin pages which help you update site patterns with the \r | |
131 | // latest version offered by FiveFilters.org. For these admin-related \r | |
132 | // tasks to complete, we will require a valid registration key.\r | |
133 | // If you would like one, you can purchase the latest version of Full-Text RSS\r | |
134 | // at http://fivefilters.org/content-only/\r | |
135 | // Your registration key will automatically be sent in the confirmation email.\r | |
136 | // Once you have it, simply copy and paste it here.\r | |
137 | $options->registration_key = '';\r | |
138 | \r | |
139 | /////////////////////////////////////////////////\r | |
140 | /// RESTRICT ACCESS /////////////////////////////\r | |
141 | /////////////////////////////////////////////////\r | |
142 | \r | |
143 | // Admin credentials\r | |
144 | // ----------------------\r | |
145 | // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.\r | |
146 | // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.\r | |
147 | // If no password or username is set, pages requiring admin privelages will be inaccessible. \r | |
148 | // The default username is 'admin'.\r | |
149 | // If overriding with an environment variable, separate username and password with a colon, e.g.:\r | |
150 | // ftr_admin_credentials: admin:my-secret-password\r | |
151 | // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');\r | |
152 | $options->admin_credentials = array('username'=>'admin', 'password'=>'admin');\r | |
153 | \r | |
154 | // URLs to allow\r | |
155 | // ----------------------\r | |
156 | // List of URLs (or parts of a URL) which the service will accept.\r | |
157 | // If the list is empty, all URLs (except those specified in the blocked list below)\r | |
158 | // will be permitted.\r | |
159 | // Empty: array();\r | |
160 | // Non-empty example: array('example.com', 'anothersite.org');\r | |
161 | $options->allowed_urls = array();\r | |
162 | \r | |
163 | // URLs to block\r | |
164 | // ----------------------\r | |
165 | // List of URLs (or parts of a URL) which the service will not accept.\r | |
166 | // Note: this list is ignored if allowed_urls is not empty\r | |
167 | $options->blocked_urls = array();\r | |
168 | \r | |
169 | // Key holder(s) only?\r | |
170 | // ----------------------\r | |
171 | // Set this to true if you want to restrict access only to\r | |
172 | // those with a key (see below to specify key(s)).\r | |
173 | // If set to true, no feed is produced unless a valid\r | |
174 | // key is provided.\r | |
175 | $options->key_required = false;\r | |
176 | \r | |
177 | // Favour item titles in feed\r | |
178 | // ----------------------\r | |
179 | // By default, when processing feeds, we assume item titles in the feed\r | |
180 | // have not been truncated. So after processing web pages, the extracted titles\r | |
181 | // are not used in the generated feed. If you prefer to have extracted titles in \r | |
182 | // the feed you can either set this to false, in which case we will always favour \r | |
183 | // extracted titles. Alternatively, if set to 'user' (default) we'll use the \r | |
184 | // extracted title if you pass '&use_extracted_title' in the querystring.\r | |
185 | // Possible values:\r | |
186 | // * Favour feed titles: true \r | |
187 | // * Favour extracted titles: false\r | |
188 | // * Favour feed titles with user override: 'user' (default)\r | |
189 | // Note: this has no effect when the input URL is to a web page - in these cases\r | |
190 | // we always use the extracted title in the generated feed.\r | |
191 | $options->favour_feed_titles = 'user';\r | |
192 | \r | |
193 | // Access keys (password protected access)\r | |
194 | // ------------------------------------\r | |
195 | // NOTE: You do not need an API key from fivefilters.org to run your own \r | |
196 | // copy of the code. This is here if you'd like to restrict access to\r | |
197 | // _your_ copy.\r | |
198 | // Keys let you group users - those with a key and those without - and\r | |
199 | // restrict access to the service to those without a key.\r | |
200 | // If you want everyone to access the service in the same way, you can\r | |
201 | // leave the array below empty and ignore the access key options further down.\r | |
202 | // The options further down let you control how the service should behave \r | |
203 | // in each mode.\r | |
204 | // Note: Explicitly including the index number (1 and 2 in the examples below) \r | |
205 | // is highly recommended (when generating feeds, we encode the key and \r | |
206 | // refer to it by index number and hash).\r | |
207 | $options->api_keys = array();\r | |
208 | // Example:\r | |
209 | // $options->api_keys[1] = 'secret-key-1';\r | |
210 | // $options->api_keys[2] = 'secret-key-2';\r | |
211 | \r | |
212 | // Default entries (with access key)\r | |
213 | // ----------------------\r | |
214 | // The number of feed items to process when a valid access key is supplied.\r | |
215 | $options->default_entries_with_key = 5;\r | |
216 | \r | |
217 | // Max entries (with access key)\r | |
218 | // ----------------------\r | |
219 | // The maximum number of feed items to process when a valid access key is supplied.\r | |
220 | $options->max_entries_with_key = 10;\r | |
221 | \r | |
222 | /////////////////////////////////////////////////\r | |
223 | /// ADVANCED OPTIONS ////////////////////////////\r | |
224 | /////////////////////////////////////////////////\r | |
225 | \r | |
226 | // Enable XSS filter?\r | |
227 | // ----------------------\r | |
228 | // We have not enabled this by default because we assume the majority of\r | |
229 | // our users do not display the HTML retrieved by Full-Text RSS\r | |
230 | // in a web page without further processing. If you subscribe to our generated\r | |
231 | // feeds in your news reader application, it should, if it's good software, already\r | |
232 | // filter the resulting HTML for XSS attacks, making it redundant for\r | |
233 | // Full-Text RSS do the same. Similarly with frameworks/CMS which display\r | |
234 | // feed content - the content should be treated like any other user-submitted content.\r | |
235 | // \r | |
236 | // If you are writing an application yourself which is processing feeds generated by\r | |
237 | // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks\r | |
238 | // or enable this option. This might be useful if you are processing our generated\r | |
239 | // feeds with JavaScript on the client side - although there's client side xss\r | |
240 | // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer\r | |
241 | // \r | |
242 | // If enabled, we'll pass retrieved HTML content through htmLawed with\r | |
243 | // safe flag on and style attributes denied, see\r | |
244 | // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6\r | |
245 | // Note: if enabled this will also remove certain elements you may want to preserve, such as iframes.\r | |
246 | //\r | |
247 | // Valid values:\r | |
248 | // true - enabled, all content will be filtered\r | |
249 | // 'user' (default) - user must pass &xss in makefulltextfeed.php querystring to enable\r | |
250 | // false - disabled\r | |
251 | $options->xss_filter = 'user';\r | |
252 | \r | |
253 | // Allowed parsers\r | |
254 | // ----------------------\r | |
255 | // Full-Text RSS attempts to use PHP's libxml extension to process HTML.\r | |
256 | // While fast, on some sites it may not always produce good results. \r | |
257 | // For these sites, you can specify an alternative HTML parser: \r | |
258 | // parser: html5lib\r | |
259 | // The html5lib parser is bundled with Full-Text RSS.\r | |
260 | // see http://code.google.com/p/html5lib/\r | |
261 | //\r | |
262 | // To disable HTML parsing with html5lib, you can remove it from this list.\r | |
263 | // By default we allow both: libxml and html5lib.\r | |
264 | $options->allowed_parsers = array('libxml', 'html5lib');\r | |
265 | //$options->allowed_parsers = array('libxml'); //disable html5lib - forcing libxml in all cases\r | |
266 | \r | |
267 | // Enable Cross-Origin Resource Sharing (CORS)\r | |
268 | // ----------------------\r | |
269 | // If enabled we'll send the following HTTP header\r | |
270 | // Access-Control-Allow-Origin: *\r | |
271 | // see http://en.wikipedia.org/wiki/Cross-origin_resource_sharing\r | |
272 | $options->cors = false;\r | |
273 | \r | |
274 | // Use APC user cache?\r | |
275 | // ----------------------\r | |
276 | // If enabled we will store site config files (when requested \r | |
277 | // for the first time) in APC's user cache. Keys prefixed with 'sc.'\r | |
278 | // This improves performance by reducing disk access.\r | |
279 | // Note: this has no effect if APC is unavailable on your server.\r | |
280 | $options->apc = true;\r | |
281 | \r | |
282 | // Smart cache (experimental)\r | |
283 | // ----------------------\r | |
284 | // With this option enabled we will not cache to disk immediately.\r | |
285 | // We will store the cache key in APC and if it's requested again\r | |
286 | // we will cache results to disk. Keys prefixed with 'cache.'\r | |
287 | // This improves performance by reducing disk access.\r | |
288 | // Note: this has no effect if APC is disabled or unavailable on your server,\r | |
289 | // or if you have caching disabled.\r | |
290 | $options->smart_cache = true;\r | |
291 | \r | |
292 | // Fingerprints\r | |
293 | // ----------------------\r | |
294 | // key is fingerprint (fragment to find in HTML)\r | |
295 | // value is host name to use for site config lookup if fingerprint matches\r | |
296 | $options->fingerprints = array(\r | |
297 | // Posterous\r | |
298 | '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),\r | |
299 | // Blogger\r | |
300 | '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),\r | |
301 | '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),\r | |
302 | // WordPress (hosted)\r | |
303 | // '<meta name="generator" content="WordPress.com"' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true),\r | |
304 | // WordPress (self-hosted and hosted)\r | |
305 | '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)\r | |
306 | );\r | |
307 | \r | |
308 | // User Agent strings - mapping domain names\r | |
309 | // ----------------------\r | |
310 | // e.g. $options->user_agents = array('example.org' => 'PHP/5.2');\r | |
311 | $options->user_agents = array( 'lifehacker.com' => 'PHP/5.2',\r | |
312 | 'gawker.com' => 'PHP/5.2',\r | |
313 | 'deadspin.com' => 'PHP/5.2',\r | |
314 | 'kotaku.com' => 'PHP/5.2',\r | |
315 | 'jezebel.com' => 'PHP/5.2',\r | |
316 | 'io9.com' => 'PHP/5.2',\r | |
317 | 'jalopnik.com' => 'PHP/5.2',\r | |
318 | 'gizmodo.com' => 'PHP/5.2',\r | |
319 | '.wikipedia.org' => 'Mozilla/5.2',\r | |
320 | '.fok.nl' => 'Googlebot/2.1',\r | |
321 | 'getpocket.com' => 'PHP/5.2'\r | |
322 | );\r | |
323 | \r | |
324 | // URL Rewriting\r | |
325 | // ----------------------\r | |
326 | // Currently allows simple string replace of URLs.\r | |
327 | // Useful for rewriting certain URLs to point to a single page\r | |
328 | // or HTML view. Although using the single_page_link site config\r | |
329 | // instruction is the preferred way to do this, sometimes, as\r | |
330 | // with Google Docs URLs, it's not possible.\r | |
331 | // Note: this might move to the site config file at some point.\r | |
332 | $options->rewrite_url = array(\r | |
333 | // Rewrite public Google Docs URLs to point to HTML view:\r | |
334 | // if a URL contains docs.google.com, replace /Doc? with /View?\r | |
335 | 'docs.google.com' => array('/Doc?' => '/View?'),\r | |
336 | 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),\r | |
337 | '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'),\r | |
338 | 'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com')\r | |
339 | );\r | |
340 | \r | |
341 | // Content-Type exceptions\r | |
342 | // -----------------------\r | |
343 | // Here you can define different actions based\r | |
344 | // on the Content-Type header returned by server.\r | |
345 | // MIME type as key, action as value.\r | |
346 | // Valid actions:\r | |
347 | // * 'exclude' - exclude this item from the result\r | |
348 | // * 'link' - create HTML link to the item\r | |
349 | $options->content_type_exc = array( \r | |
350 | 'application/pdf' => array('action'=>'link', 'name'=>'PDF'),\r | |
351 | 'image' => array('action'=>'link', 'name'=>'Image'),\r | |
352 | 'audio' => array('action'=>'link', 'name'=>'Audio'),\r | |
353 | 'video' => array('action'=>'link', 'name'=>'Video')\r | |
354 | );\r | |
355 | \r | |
356 | // Cache directory level\r | |
357 | // ----------------------\r | |
358 | // Spread cache files over different directories (only used if caching is enabled).\r | |
359 | // Used to prevent large number of files in one directory.\r | |
360 | // This corresponds to Zend_Cache's hashed_directory_level\r | |
361 | // see http://framework.zend.com/manual/en/zend.cache.backends.html\r | |
362 | // It's best not to change this if you're unsure.\r | |
363 | $options->cache_directory_level = 0;\r | |
364 | \r | |
365 | // Cache cleanup\r | |
366 | // -------------\r | |
367 | // 0 = script will not clean cache (rename cachecleanup.php and use it for scheduled (e.g. cron) cache cleanup)\r | |
368 | // 1 = clean cache everytime the script runs (not recommended)\r | |
369 | // 100 = clean cache roughly once every 100 script runs\r | |
370 | // x = clean cache roughly once every x script runs\r | |
371 | // ...you get the idea :)\r | |
372 | $options->cache_cleanup = 100;\r | |
373 | \r | |
374 | /////////////////////////////////////////////////\r | |
375 | /// DO NOT CHANGE ANYTHING BELOW THIS ///////////\r | |
376 | /////////////////////////////////////////////////\r | |
377 | \r | |
378 | if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');\r | |
379 | \r | |
380 | if (basename(__FILE__) == 'config.php') {\r | |
381 | if (file_exists(dirname(__FILE__).'/custom_config.php')) {\r | |
382 | require_once dirname(__FILE__).'/custom_config.php';\r | |
383 | }\r | |
384 | \r | |
385 | // check for environment variables - often used on cloud platforms\r | |
386 | // environment variables should be prefixed with 'ftr_', e.g.\r | |
387 | // ftr_max_entries: 1\r | |
388 | // will set the max_entries value to 1.\r | |
389 | foreach ($options as $_key=>&$_val) {\r | |
390 | $_key = "ftr_$_key";\r | |
391 | if (($_env = getenv($_key)) !== false) {\r | |
392 | if (is_array($_val)) {\r | |
393 | if ($_key === 'ftr_admin_credentials') {\r | |
394 | $_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2)));\r | |
395 | if ($_val === false) $_val = array('username'=>'admin', 'password'=>'');\r | |
396 | }\r | |
397 | } elseif ($_env === 'true' || $_env === 'false') {\r | |
398 | $_val = ($_env === 'true');\r | |
399 | } elseif (is_numeric($_env)) {\r | |
400 | $_val = (int)$_env;\r | |
401 | } else { // string\r | |
402 | $_val = $_env;\r | |
403 | }\r | |
404 | }\r | |
405 | }\r | |
406 | unset($_key, $_val, $_env);\r | |
407 | } |