]>
Commit | Line | Data |
---|---|---|
d4949327 NL |
1 | <?php\r |
2 | \r | |
3 | /*! @mainpage\r | |
4 | *\r | |
5 | * HTML Purifier is an HTML filter that will take an arbitrary snippet of\r | |
6 | * HTML and rigorously test, validate and filter it into a version that\r | |
7 | * is safe for output onto webpages. It achieves this by:\r | |
8 | *\r | |
9 | * -# Lexing (parsing into tokens) the document,\r | |
10 | * -# Executing various strategies on the tokens:\r | |
11 | * -# Removing all elements not in the whitelist,\r | |
12 | * -# Making the tokens well-formed,\r | |
13 | * -# Fixing the nesting of the nodes, and\r | |
14 | * -# Validating attributes of the nodes; and\r | |
15 | * -# Generating HTML from the purified tokens.\r | |
16 | *\r | |
17 | * However, most users will only need to interface with the HTMLPurifier\r | |
18 | * and HTMLPurifier_Config.\r | |
19 | */\r | |
20 | \r | |
21 | /*\r | |
22 | HTML Purifier 4.6.0 - Standards Compliant HTML Filtering\r | |
23 | Copyright (C) 2006-2008 Edward Z. Yang\r | |
24 | \r | |
25 | This library is free software; you can redistribute it and/or\r | |
26 | modify it under the terms of the GNU Lesser General Public\r | |
27 | License as published by the Free Software Foundation; either\r | |
28 | version 2.1 of the License, or (at your option) any later version.\r | |
29 | \r | |
30 | This library is distributed in the hope that it will be useful,\r | |
31 | but WITHOUT ANY WARRANTY; without even the implied warranty of\r | |
32 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r | |
33 | Lesser General Public License for more details.\r | |
34 | \r | |
35 | You should have received a copy of the GNU Lesser General Public\r | |
36 | License along with this library; if not, write to the Free Software\r | |
37 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r | |
38 | */\r | |
39 | \r | |
40 | /**\r | |
41 | * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.\r | |
42 | *\r | |
43 | * @note There are several points in which configuration can be specified\r | |
44 | * for HTML Purifier. The precedence of these (from lowest to\r | |
45 | * highest) is as follows:\r | |
46 | * -# Instance: new HTMLPurifier($config)\r | |
47 | * -# Invocation: purify($html, $config)\r | |
48 | * These configurations are entirely independent of each other and\r | |
49 | * are *not* merged (this behavior may change in the future).\r | |
50 | *\r | |
51 | * @todo We need an easier way to inject strategies using the configuration\r | |
52 | * object.\r | |
53 | */\r | |
54 | class HTMLPurifier\r | |
55 | {\r | |
56 | \r | |
57 | /**\r | |
58 | * Version of HTML Purifier.\r | |
59 | * @type string\r | |
60 | */\r | |
61 | public $version = '4.6.0';\r | |
62 | \r | |
63 | /**\r | |
64 | * Constant with version of HTML Purifier.\r | |
65 | */\r | |
66 | const VERSION = '4.6.0';\r | |
67 | \r | |
68 | /**\r | |
69 | * Global configuration object.\r | |
70 | * @type HTMLPurifier_Config\r | |
71 | */\r | |
72 | public $config;\r | |
73 | \r | |
74 | /**\r | |
75 | * Array of extra filter objects to run on HTML,\r | |
76 | * for backwards compatibility.\r | |
77 | * @type HTMLPurifier_Filter[]\r | |
78 | */\r | |
79 | private $filters = array();\r | |
80 | \r | |
81 | /**\r | |
82 | * Single instance of HTML Purifier.\r | |
83 | * @type HTMLPurifier\r | |
84 | */\r | |
85 | private static $instance;\r | |
86 | \r | |
87 | /**\r | |
88 | * @type HTMLPurifier_Strategy_Core\r | |
89 | */\r | |
90 | protected $strategy;\r | |
91 | \r | |
92 | /**\r | |
93 | * @type HTMLPurifier_Generator\r | |
94 | */\r | |
95 | protected $generator;\r | |
96 | \r | |
97 | /**\r | |
98 | * Resultant context of last run purification.\r | |
99 | * Is an array of contexts if the last called method was purifyArray().\r | |
100 | * @type HTMLPurifier_Context\r | |
101 | */\r | |
102 | public $context;\r | |
103 | \r | |
104 | /**\r | |
105 | * Initializes the purifier.\r | |
106 | *\r | |
107 | * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object\r | |
108 | * for all instances of the purifier, if omitted, a default\r | |
109 | * configuration is supplied (which can be overridden on a\r | |
110 | * per-use basis).\r | |
111 | * The parameter can also be any type that\r | |
112 | * HTMLPurifier_Config::create() supports.\r | |
113 | */\r | |
114 | public function __construct($config = null)\r | |
115 | {\r | |
116 | $this->config = HTMLPurifier_Config::create($config);\r | |
117 | $this->strategy = new HTMLPurifier_Strategy_Core();\r | |
118 | }\r | |
119 | \r | |
120 | /**\r | |
121 | * Adds a filter to process the output. First come first serve\r | |
122 | *\r | |
123 | * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object\r | |
124 | */\r | |
125 | public function addFilter($filter)\r | |
126 | {\r | |
127 | trigger_error(\r | |
128 | 'HTMLPurifier->addFilter() is deprecated, use configuration directives' .\r | |
129 | ' in the Filter namespace or Filter.Custom',\r | |
130 | E_USER_WARNING\r | |
131 | );\r | |
132 | $this->filters[] = $filter;\r | |
133 | }\r | |
134 | \r | |
135 | /**\r | |
136 | * Filters an HTML snippet/document to be XSS-free and standards-compliant.\r | |
137 | *\r | |
138 | * @param string $html String of HTML to purify\r | |
139 | * @param HTMLPurifier_Config $config Config object for this operation,\r | |
140 | * if omitted, defaults to the config object specified during this\r | |
141 | * object's construction. The parameter can also be any type\r | |
142 | * that HTMLPurifier_Config::create() supports.\r | |
143 | *\r | |
144 | * @return string Purified HTML\r | |
145 | */\r | |
146 | public function purify($html, $config = null)\r | |
147 | {\r | |
148 | // :TODO: make the config merge in, instead of replace\r | |
149 | $config = $config ? HTMLPurifier_Config::create($config) : $this->config;\r | |
150 | \r | |
151 | // implementation is partially environment dependant, partially\r | |
152 | // configuration dependant\r | |
153 | $lexer = HTMLPurifier_Lexer::create($config);\r | |
154 | \r | |
155 | $context = new HTMLPurifier_Context();\r | |
156 | \r | |
157 | // setup HTML generator\r | |
158 | $this->generator = new HTMLPurifier_Generator($config, $context);\r | |
159 | $context->register('Generator', $this->generator);\r | |
160 | \r | |
161 | // set up global context variables\r | |
162 | if ($config->get('Core.CollectErrors')) {\r | |
163 | // may get moved out if other facilities use it\r | |
164 | $language_factory = HTMLPurifier_LanguageFactory::instance();\r | |
165 | $language = $language_factory->create($config, $context);\r | |
166 | $context->register('Locale', $language);\r | |
167 | \r | |
168 | $error_collector = new HTMLPurifier_ErrorCollector($context);\r | |
169 | $context->register('ErrorCollector', $error_collector);\r | |
170 | }\r | |
171 | \r | |
172 | // setup id_accumulator context, necessary due to the fact that\r | |
173 | // AttrValidator can be called from many places\r | |
174 | $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);\r | |
175 | $context->register('IDAccumulator', $id_accumulator);\r | |
176 | \r | |
177 | $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);\r | |
178 | \r | |
179 | // setup filters\r | |
180 | $filter_flags = $config->getBatch('Filter');\r | |
181 | $custom_filters = $filter_flags['Custom'];\r | |
182 | unset($filter_flags['Custom']);\r | |
183 | $filters = array();\r | |
184 | foreach ($filter_flags as $filter => $flag) {\r | |
185 | if (!$flag) {\r | |
186 | continue;\r | |
187 | }\r | |
188 | if (strpos($filter, '.') !== false) {\r | |
189 | continue;\r | |
190 | }\r | |
191 | $class = "HTMLPurifier_Filter_$filter";\r | |
192 | $filters[] = new $class;\r | |
193 | }\r | |
194 | foreach ($custom_filters as $filter) {\r | |
195 | // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat\r | |
196 | $filters[] = $filter;\r | |
197 | }\r | |
198 | $filters = array_merge($filters, $this->filters);\r | |
199 | // maybe prepare(), but later\r | |
200 | \r | |
201 | for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {\r | |
202 | $html = $filters[$i]->preFilter($html, $config, $context);\r | |
203 | }\r | |
204 | \r | |
205 | // purified HTML\r | |
206 | $html =\r | |
207 | $this->generator->generateFromTokens(\r | |
208 | // list of tokens\r | |
209 | $this->strategy->execute(\r | |
210 | // list of un-purified tokens\r | |
211 | $lexer->tokenizeHTML(\r | |
212 | // un-purified HTML\r | |
213 | $html,\r | |
214 | $config,\r | |
215 | $context\r | |
216 | ),\r | |
217 | $config,\r | |
218 | $context\r | |
219 | )\r | |
220 | );\r | |
221 | \r | |
222 | for ($i = $filter_size - 1; $i >= 0; $i--) {\r | |
223 | $html = $filters[$i]->postFilter($html, $config, $context);\r | |
224 | }\r | |
225 | \r | |
226 | $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);\r | |
227 | $this->context =& $context;\r | |
228 | return $html;\r | |
229 | }\r | |
230 | \r | |
231 | /**\r | |
232 | * Filters an array of HTML snippets\r | |
233 | *\r | |
234 | * @param string[] $array_of_html Array of html snippets\r | |
235 | * @param HTMLPurifier_Config $config Optional config object for this operation.\r | |
236 | * See HTMLPurifier::purify() for more details.\r | |
237 | *\r | |
238 | * @return string[] Array of purified HTML\r | |
239 | */\r | |
240 | public function purifyArray($array_of_html, $config = null)\r | |
241 | {\r | |
242 | $context_array = array();\r | |
243 | foreach ($array_of_html as $key => $html) {\r | |
244 | $array_of_html[$key] = $this->purify($html, $config);\r | |
245 | $context_array[$key] = $this->context;\r | |
246 | }\r | |
247 | $this->context = $context_array;\r | |
248 | return $array_of_html;\r | |
249 | }\r | |
250 | \r | |
251 | /**\r | |
252 | * Singleton for enforcing just one HTML Purifier in your system\r | |
253 | *\r | |
254 | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype\r | |
255 | * HTMLPurifier instance to overload singleton with,\r | |
256 | * or HTMLPurifier_Config instance to configure the\r | |
257 | * generated version with.\r | |
258 | *\r | |
259 | * @return HTMLPurifier\r | |
260 | */\r | |
261 | public static function instance($prototype = null)\r | |
262 | {\r | |
263 | if (!self::$instance || $prototype) {\r | |
264 | if ($prototype instanceof HTMLPurifier) {\r | |
265 | self::$instance = $prototype;\r | |
266 | } elseif ($prototype) {\r | |
267 | self::$instance = new HTMLPurifier($prototype);\r | |
268 | } else {\r | |
269 | self::$instance = new HTMLPurifier();\r | |
270 | }\r | |
271 | }\r | |
272 | return self::$instance;\r | |
273 | }\r | |
274 | \r | |
275 | /**\r | |
276 | * Singleton for enforcing just one HTML Purifier in your system\r | |
277 | *\r | |
278 | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype\r | |
279 | * HTMLPurifier instance to overload singleton with,\r | |
280 | * or HTMLPurifier_Config instance to configure the\r | |
281 | * generated version with.\r | |
282 | *\r | |
283 | * @return HTMLPurifier\r | |
284 | * @note Backwards compatibility, see instance()\r | |
285 | */\r | |
286 | public static function getInstance($prototype = null)\r | |
287 | {\r | |
288 | return HTMLPurifier::instance($prototype);\r | |
289 | }\r | |
290 | }\r | |
291 | \r | |
292 | // vim: et sw=4 sts=4\r |