diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-02-21 15:57:10 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-02-21 15:57:10 +0100 |
commit | 99679d06884120c57f43b44e55e03595f1f87bed (patch) | |
tree | a3f2a1aa1afdaeca1386d0c6e8a75344fd2241fb /inc/3rdparty/htmlpurifier/HTMLPurifier.php | |
parent | 655214ab30ee84884dc408488b85586f36263fcb (diff) | |
parent | d3b47e94705e17b3ba3529cbb1dc6efe69c5d2b7 (diff) | |
download | wallabag-99679d06884120c57f43b44e55e03595f1f87bed.tar.gz wallabag-99679d06884120c57f43b44e55e03595f1f87bed.tar.zst wallabag-99679d06884120c57f43b44e55e03595f1f87bed.zip |
Merge pull request #481 from wallabag/dev1.5.2
1.5.2
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier.php | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier.php b/inc/3rdparty/htmlpurifier/HTMLPurifier.php new file mode 100644 index 00000000..428a48bf --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier.php | |||
@@ -0,0 +1,292 @@ | |||
1 | <?php | ||
2 | |||
3 | /*! @mainpage | ||
4 | * | ||
5 | * HTML Purifier is an HTML filter that will take an arbitrary snippet of | ||
6 | * HTML and rigorously test, validate and filter it into a version that | ||
7 | * is safe for output onto webpages. It achieves this by: | ||
8 | * | ||
9 | * -# Lexing (parsing into tokens) the document, | ||
10 | * -# Executing various strategies on the tokens: | ||
11 | * -# Removing all elements not in the whitelist, | ||
12 | * -# Making the tokens well-formed, | ||
13 | * -# Fixing the nesting of the nodes, and | ||
14 | * -# Validating attributes of the nodes; and | ||
15 | * -# Generating HTML from the purified tokens. | ||
16 | * | ||
17 | * However, most users will only need to interface with the HTMLPurifier | ||
18 | * and HTMLPurifier_Config. | ||
19 | */ | ||
20 | |||
21 | /* | ||
22 | HTML Purifier 4.6.0 - Standards Compliant HTML Filtering | ||
23 | Copyright (C) 2006-2008 Edward Z. Yang | ||
24 | |||
25 | This library is free software; you can redistribute it and/or | ||
26 | modify it under the terms of the GNU Lesser General Public | ||
27 | License as published by the Free Software Foundation; either | ||
28 | version 2.1 of the License, or (at your option) any later version. | ||
29 | |||
30 | This library is distributed in the hope that it will be useful, | ||
31 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
32 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
33 | Lesser General Public License for more details. | ||
34 | |||
35 | You should have received a copy of the GNU Lesser General Public | ||
36 | License along with this library; if not, write to the Free Software | ||
37 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
38 | */ | ||
39 | |||
40 | /** | ||
41 | * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. | ||
42 | * | ||
43 | * @note There are several points in which configuration can be specified | ||
44 | * for HTML Purifier. The precedence of these (from lowest to | ||
45 | * highest) is as follows: | ||
46 | * -# Instance: new HTMLPurifier($config) | ||
47 | * -# Invocation: purify($html, $config) | ||
48 | * These configurations are entirely independent of each other and | ||
49 | * are *not* merged (this behavior may change in the future). | ||
50 | * | ||
51 | * @todo We need an easier way to inject strategies using the configuration | ||
52 | * object. | ||
53 | */ | ||
54 | class HTMLPurifier | ||
55 | { | ||
56 | |||
57 | /** | ||
58 | * Version of HTML Purifier. | ||
59 | * @type string | ||
60 | */ | ||
61 | public $version = '4.6.0'; | ||
62 | |||
63 | /** | ||
64 | * Constant with version of HTML Purifier. | ||
65 | */ | ||
66 | const VERSION = '4.6.0'; | ||
67 | |||
68 | /** | ||
69 | * Global configuration object. | ||
70 | * @type HTMLPurifier_Config | ||
71 | */ | ||
72 | public $config; | ||
73 | |||
74 | /** | ||
75 | * Array of extra filter objects to run on HTML, | ||
76 | * for backwards compatibility. | ||
77 | * @type HTMLPurifier_Filter[] | ||
78 | */ | ||
79 | private $filters = array(); | ||
80 | |||
81 | /** | ||
82 | * Single instance of HTML Purifier. | ||
83 | * @type HTMLPurifier | ||
84 | */ | ||
85 | private static $instance; | ||
86 | |||
87 | /** | ||
88 | * @type HTMLPurifier_Strategy_Core | ||
89 | */ | ||
90 | protected $strategy; | ||
91 | |||
92 | /** | ||
93 | * @type HTMLPurifier_Generator | ||
94 | */ | ||
95 | protected $generator; | ||
96 | |||
97 | /** | ||
98 | * Resultant context of last run purification. | ||
99 | * Is an array of contexts if the last called method was purifyArray(). | ||
100 | * @type HTMLPurifier_Context | ||
101 | */ | ||
102 | public $context; | ||
103 | |||
104 | /** | ||
105 | * Initializes the purifier. | ||
106 | * | ||
107 | * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object | ||
108 | * for all instances of the purifier, if omitted, a default | ||
109 | * configuration is supplied (which can be overridden on a | ||
110 | * per-use basis). | ||
111 | * The parameter can also be any type that | ||
112 | * HTMLPurifier_Config::create() supports. | ||
113 | */ | ||
114 | public function __construct($config = null) | ||
115 | { | ||
116 | $this->config = HTMLPurifier_Config::create($config); | ||
117 | $this->strategy = new HTMLPurifier_Strategy_Core(); | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * Adds a filter to process the output. First come first serve | ||
122 | * | ||
123 | * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object | ||
124 | */ | ||
125 | public function addFilter($filter) | ||
126 | { | ||
127 | trigger_error( | ||
128 | 'HTMLPurifier->addFilter() is deprecated, use configuration directives' . | ||
129 | ' in the Filter namespace or Filter.Custom', | ||
130 | E_USER_WARNING | ||
131 | ); | ||
132 | $this->filters[] = $filter; | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * Filters an HTML snippet/document to be XSS-free and standards-compliant. | ||
137 | * | ||
138 | * @param string $html String of HTML to purify | ||
139 | * @param HTMLPurifier_Config $config Config object for this operation, | ||
140 | * if omitted, defaults to the config object specified during this | ||
141 | * object's construction. The parameter can also be any type | ||
142 | * that HTMLPurifier_Config::create() supports. | ||
143 | * | ||
144 | * @return string Purified HTML | ||
145 | */ | ||
146 | public function purify($html, $config = null) | ||
147 | { | ||
148 | // :TODO: make the config merge in, instead of replace | ||
149 | $config = $config ? HTMLPurifier_Config::create($config) : $this->config; | ||
150 | |||
151 | // implementation is partially environment dependant, partially | ||
152 | // configuration dependant | ||
153 | $lexer = HTMLPurifier_Lexer::create($config); | ||
154 | |||
155 | $context = new HTMLPurifier_Context(); | ||
156 | |||
157 | // setup HTML generator | ||
158 | $this->generator = new HTMLPurifier_Generator($config, $context); | ||
159 | $context->register('Generator', $this->generator); | ||
160 | |||
161 | // set up global context variables | ||
162 | if ($config->get('Core.CollectErrors')) { | ||
163 | // may get moved out if other facilities use it | ||
164 | $language_factory = HTMLPurifier_LanguageFactory::instance(); | ||
165 | $language = $language_factory->create($config, $context); | ||
166 | $context->register('Locale', $language); | ||
167 | |||
168 | $error_collector = new HTMLPurifier_ErrorCollector($context); | ||
169 | $context->register('ErrorCollector', $error_collector); | ||
170 | } | ||
171 | |||
172 | // setup id_accumulator context, necessary due to the fact that | ||
173 | // AttrValidator can be called from many places | ||
174 | $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); | ||
175 | $context->register('IDAccumulator', $id_accumulator); | ||
176 | |||
177 | $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); | ||
178 | |||
179 | // setup filters | ||
180 | $filter_flags = $config->getBatch('Filter'); | ||
181 | $custom_filters = $filter_flags['Custom']; | ||
182 | unset($filter_flags['Custom']); | ||
183 | $filters = array(); | ||
184 | foreach ($filter_flags as $filter => $flag) { | ||
185 | if (!$flag) { | ||
186 | continue; | ||
187 | } | ||
188 | if (strpos($filter, '.') !== false) { | ||
189 | continue; | ||
190 | } | ||
191 | $class = "HTMLPurifier_Filter_$filter"; | ||
192 | $filters[] = new $class; | ||
193 | } | ||
194 | foreach ($custom_filters as $filter) { | ||
195 | // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat | ||
196 | $filters[] = $filter; | ||
197 | } | ||
198 | $filters = array_merge($filters, $this->filters); | ||
199 | // maybe prepare(), but later | ||
200 | |||
201 | for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { | ||
202 | $html = $filters[$i]->preFilter($html, $config, $context); | ||
203 | } | ||
204 | |||
205 | // purified HTML | ||
206 | $html = | ||
207 | $this->generator->generateFromTokens( | ||
208 | // list of tokens | ||
209 | $this->strategy->execute( | ||
210 | // list of un-purified tokens | ||
211 | $lexer->tokenizeHTML( | ||
212 | // un-purified HTML | ||
213 | $html, | ||
214 | $config, | ||
215 | $context | ||
216 | ), | ||
217 | $config, | ||
218 | $context | ||
219 | ) | ||
220 | ); | ||
221 | |||
222 | for ($i = $filter_size - 1; $i >= 0; $i--) { | ||
223 | $html = $filters[$i]->postFilter($html, $config, $context); | ||
224 | } | ||
225 | |||
226 | $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); | ||
227 | $this->context =& $context; | ||
228 | return $html; | ||
229 | } | ||
230 | |||
231 | /** | ||
232 | * Filters an array of HTML snippets | ||
233 | * | ||
234 | * @param string[] $array_of_html Array of html snippets | ||
235 | * @param HTMLPurifier_Config $config Optional config object for this operation. | ||
236 | * See HTMLPurifier::purify() for more details. | ||
237 | * | ||
238 | * @return string[] Array of purified HTML | ||
239 | */ | ||
240 | public function purifyArray($array_of_html, $config = null) | ||
241 | { | ||
242 | $context_array = array(); | ||
243 | foreach ($array_of_html as $key => $html) { | ||
244 | $array_of_html[$key] = $this->purify($html, $config); | ||
245 | $context_array[$key] = $this->context; | ||
246 | } | ||
247 | $this->context = $context_array; | ||
248 | return $array_of_html; | ||
249 | } | ||
250 | |||
251 | /** | ||
252 | * Singleton for enforcing just one HTML Purifier in your system | ||
253 | * | ||
254 | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype | ||
255 | * HTMLPurifier instance to overload singleton with, | ||
256 | * or HTMLPurifier_Config instance to configure the | ||
257 | * generated version with. | ||
258 | * | ||
259 | * @return HTMLPurifier | ||
260 | */ | ||
261 | public static function instance($prototype = null) | ||
262 | { | ||
263 | if (!self::$instance || $prototype) { | ||
264 | if ($prototype instanceof HTMLPurifier) { | ||
265 | self::$instance = $prototype; | ||
266 | } elseif ($prototype) { | ||
267 | self::$instance = new HTMLPurifier($prototype); | ||
268 | } else { | ||
269 | self::$instance = new HTMLPurifier(); | ||
270 | } | ||
271 | } | ||
272 | return self::$instance; | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * Singleton for enforcing just one HTML Purifier in your system | ||
277 | * | ||
278 | * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype | ||
279 | * HTMLPurifier instance to overload singleton with, | ||
280 | * or HTMLPurifier_Config instance to configure the | ||
281 | * generated version with. | ||
282 | * | ||
283 | * @return HTMLPurifier | ||
284 | * @note Backwards compatibility, see instance() | ||
285 | */ | ||
286 | public static function getInstance($prototype = null) | ||
287 | { | ||
288 | return HTMLPurifier::instance($prototype); | ||
289 | } | ||
290 | } | ||
291 | |||
292 | // vim: et sw=4 sts=4 | ||