diff options
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy')
6 files changed, 1080 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/Composite.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/Composite.php new file mode 100644 index 00000000..9de812df --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/Composite.php | |||
@@ -0,0 +1,30 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Composite strategy that runs multiple strategies on tokens. | ||
5 | */ | ||
6 | abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy | ||
7 | { | ||
8 | |||
9 | /** | ||
10 | * List of strategies to run tokens through. | ||
11 | * @type HTMLPurifier_Strategy[] | ||
12 | */ | ||
13 | protected $strategies = array(); | ||
14 | |||
15 | /** | ||
16 | * @param HTMLPurifier_Token[] $tokens | ||
17 | * @param HTMLPurifier_Config $config | ||
18 | * @param HTMLPurifier_Context $context | ||
19 | * @return HTMLPurifier_Token[] | ||
20 | */ | ||
21 | public function execute($tokens, $config, $context) | ||
22 | { | ||
23 | foreach ($this->strategies as $strategy) { | ||
24 | $tokens = $strategy->execute($tokens, $config, $context); | ||
25 | } | ||
26 | return $tokens; | ||
27 | } | ||
28 | } | ||
29 | |||
30 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/Core.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/Core.php new file mode 100644 index 00000000..07752152 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/Core.php | |||
@@ -0,0 +1,17 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Core strategy composed of the big four strategies. | ||
5 | */ | ||
6 | class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite | ||
7 | { | ||
8 | public function __construct() | ||
9 | { | ||
10 | $this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements(); | ||
11 | $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed(); | ||
12 | $this->strategies[] = new HTMLPurifier_Strategy_FixNesting(); | ||
13 | $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes(); | ||
14 | } | ||
15 | } | ||
16 | |||
17 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php new file mode 100644 index 00000000..8b1eb20f --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php | |||
@@ -0,0 +1,181 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Takes a well formed list of tokens and fixes their nesting. | ||
5 | * | ||
6 | * HTML elements dictate which elements are allowed to be their children, | ||
7 | * for example, you can't have a p tag in a span tag. Other elements have | ||
8 | * much more rigorous definitions: tables, for instance, require a specific | ||
9 | * order for their elements. There are also constraints not expressible by | ||
10 | * document type definitions, such as the chameleon nature of ins/del | ||
11 | * tags and global child exclusions. | ||
12 | * | ||
13 | * The first major objective of this strategy is to iterate through all | ||
14 | * the nodes and determine whether or not their children conform to the | ||
15 | * element's definition. If they do not, the child definition may | ||
16 | * optionally supply an amended list of elements that is valid or | ||
17 | * require that the entire node be deleted (and the previous node | ||
18 | * rescanned). | ||
19 | * | ||
20 | * The second objective is to ensure that explicitly excluded elements of | ||
21 | * an element do not appear in its children. Code that accomplishes this | ||
22 | * task is pervasive through the strategy, though the two are distinct tasks | ||
23 | * and could, theoretically, be seperated (although it's not recommended). | ||
24 | * | ||
25 | * @note Whether or not unrecognized children are silently dropped or | ||
26 | * translated into text depends on the child definitions. | ||
27 | * | ||
28 | * @todo Enable nodes to be bubbled out of the structure. This is | ||
29 | * easier with our new algorithm. | ||
30 | */ | ||
31 | |||
32 | class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy | ||
33 | { | ||
34 | |||
35 | /** | ||
36 | * @param HTMLPurifier_Token[] $tokens | ||
37 | * @param HTMLPurifier_Config $config | ||
38 | * @param HTMLPurifier_Context $context | ||
39 | * @return array|HTMLPurifier_Token[] | ||
40 | */ | ||
41 | public function execute($tokens, $config, $context) | ||
42 | { | ||
43 | |||
44 | //####################################################################// | ||
45 | // Pre-processing | ||
46 | |||
47 | // O(n) pass to convert to a tree, so that we can efficiently | ||
48 | // refer to substrings | ||
49 | $top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context); | ||
50 | |||
51 | // get a copy of the HTML definition | ||
52 | $definition = $config->getHTMLDefinition(); | ||
53 | |||
54 | $excludes_enabled = !$config->get('Core.DisableExcludes'); | ||
55 | |||
56 | // setup the context variable 'IsInline', for chameleon processing | ||
57 | // is 'false' when we are not inline, 'true' when it must always | ||
58 | // be inline, and an integer when it is inline for a certain | ||
59 | // branch of the document tree | ||
60 | $is_inline = $definition->info_parent_def->descendants_are_inline; | ||
61 | $context->register('IsInline', $is_inline); | ||
62 | |||
63 | // setup error collector | ||
64 | $e =& $context->get('ErrorCollector', true); | ||
65 | |||
66 | //####################################################################// | ||
67 | // Loop initialization | ||
68 | |||
69 | // stack that contains all elements that are excluded | ||
70 | // it is organized by parent elements, similar to $stack, | ||
71 | // but it is only populated when an element with exclusions is | ||
72 | // processed, i.e. there won't be empty exclusions. | ||
73 | $exclude_stack = array($definition->info_parent_def->excludes); | ||
74 | |||
75 | // variable that contains the start token while we are processing | ||
76 | // nodes. This enables error reporting to do its job | ||
77 | $node = $top_node; | ||
78 | // dummy token | ||
79 | list($token, $d) = $node->toTokenPair(); | ||
80 | $context->register('CurrentNode', $node); | ||
81 | $context->register('CurrentToken', $token); | ||
82 | |||
83 | //####################################################################// | ||
84 | // Loop | ||
85 | |||
86 | // We need to implement a post-order traversal iteratively, to | ||
87 | // avoid running into stack space limits. This is pretty tricky | ||
88 | // to reason about, so we just manually stack-ify the recursive | ||
89 | // variant: | ||
90 | // | ||
91 | // function f($node) { | ||
92 | // foreach ($node->children as $child) { | ||
93 | // f($child); | ||
94 | // } | ||
95 | // validate($node); | ||
96 | // } | ||
97 | // | ||
98 | // Thus, we will represent a stack frame as array($node, | ||
99 | // $is_inline, stack of children) | ||
100 | // e.g. array_reverse($node->children) - already processed | ||
101 | // children. | ||
102 | |||
103 | $parent_def = $definition->info_parent_def; | ||
104 | $stack = array( | ||
105 | array($top_node, | ||
106 | $parent_def->descendants_are_inline, | ||
107 | $parent_def->excludes, // exclusions | ||
108 | 0) | ||
109 | ); | ||
110 | |||
111 | while (!empty($stack)) { | ||
112 | list($node, $is_inline, $excludes, $ix) = array_pop($stack); | ||
113 | // recursive call | ||
114 | $go = false; | ||
115 | $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name]; | ||
116 | while (isset($node->children[$ix])) { | ||
117 | $child = $node->children[$ix++]; | ||
118 | if ($child instanceof HTMLPurifier_Node_Element) { | ||
119 | $go = true; | ||
120 | $stack[] = array($node, $is_inline, $excludes, $ix); | ||
121 | $stack[] = array($child, | ||
122 | // ToDo: I don't think it matters if it's def or | ||
123 | // child_def, but double check this... | ||
124 | $is_inline || $def->descendants_are_inline, | ||
125 | empty($def->excludes) ? $excludes | ||
126 | : array_merge($excludes, $def->excludes), | ||
127 | 0); | ||
128 | break; | ||
129 | } | ||
130 | }; | ||
131 | if ($go) continue; | ||
132 | list($token, $d) = $node->toTokenPair(); | ||
133 | // base case | ||
134 | if ($excludes_enabled && isset($excludes[$node->name])) { | ||
135 | $node->dead = true; | ||
136 | if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); | ||
137 | } else { | ||
138 | // XXX I suppose it would be slightly more efficient to | ||
139 | // avoid the allocation here and have children | ||
140 | // strategies handle it | ||
141 | $children = array(); | ||
142 | foreach ($node->children as $child) { | ||
143 | if (!$child->dead) $children[] = $child; | ||
144 | } | ||
145 | $result = $def->child->validateChildren($children, $config, $context); | ||
146 | if ($result === true) { | ||
147 | // nop | ||
148 | $node->children = $children; | ||
149 | } elseif ($result === false) { | ||
150 | $node->dead = true; | ||
151 | if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); | ||
152 | } else { | ||
153 | $node->children = $result; | ||
154 | if ($e) { | ||
155 | // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators | ||
156 | if (empty($result) && !empty($children)) { | ||
157 | $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); | ||
158 | } else if ($result != $children) { | ||
159 | $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); | ||
160 | } | ||
161 | } | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | //####################################################################// | ||
167 | // Post-processing | ||
168 | |||
169 | // remove context variables | ||
170 | $context->destroy('IsInline'); | ||
171 | $context->destroy('CurrentNode'); | ||
172 | $context->destroy('CurrentToken'); | ||
173 | |||
174 | //####################################################################// | ||
175 | // Return | ||
176 | |||
177 | return HTMLPurifier_Arborize::flatten($node, $config, $context); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php new file mode 100644 index 00000000..2c792feb --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php | |||
@@ -0,0 +1,600 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Takes tokens makes them well-formed (balance end tags, etc.) | ||
5 | * | ||
6 | * Specification of the armor attributes this strategy uses: | ||
7 | * | ||
8 | * - MakeWellFormed_TagClosedError: This armor field is used to | ||
9 | * suppress tag closed errors for certain tokens [TagClosedSuppress], | ||
10 | * in particular, if a tag was generated automatically by HTML | ||
11 | * Purifier, we may rely on our infrastructure to close it for us | ||
12 | * and shouldn't report an error to the user [TagClosedAuto]. | ||
13 | */ | ||
14 | class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy | ||
15 | { | ||
16 | |||
17 | /** | ||
18 | * Array stream of tokens being processed. | ||
19 | * @type HTMLPurifier_Token[] | ||
20 | */ | ||
21 | protected $tokens; | ||
22 | |||
23 | /** | ||
24 | * Current token. | ||
25 | * @type HTMLPurifier_Token | ||
26 | */ | ||
27 | protected $token; | ||
28 | |||
29 | /** | ||
30 | * Zipper managing the true state. | ||
31 | * @type HTMLPurifier_Zipper | ||
32 | */ | ||
33 | protected $zipper; | ||
34 | |||
35 | /** | ||
36 | * Current nesting of elements. | ||
37 | * @type array | ||
38 | */ | ||
39 | protected $stack; | ||
40 | |||
41 | /** | ||
42 | * Injectors active in this stream processing. | ||
43 | * @type HTMLPurifier_Injector[] | ||
44 | */ | ||
45 | protected $injectors; | ||
46 | |||
47 | /** | ||
48 | * Current instance of HTMLPurifier_Config. | ||
49 | * @type HTMLPurifier_Config | ||
50 | */ | ||
51 | protected $config; | ||
52 | |||
53 | /** | ||
54 | * Current instance of HTMLPurifier_Context. | ||
55 | * @type HTMLPurifier_Context | ||
56 | */ | ||
57 | protected $context; | ||
58 | |||
59 | /** | ||
60 | * @param HTMLPurifier_Token[] $tokens | ||
61 | * @param HTMLPurifier_Config $config | ||
62 | * @param HTMLPurifier_Context $context | ||
63 | * @return HTMLPurifier_Token[] | ||
64 | * @throws HTMLPurifier_Exception | ||
65 | */ | ||
66 | public function execute($tokens, $config, $context) | ||
67 | { | ||
68 | $definition = $config->getHTMLDefinition(); | ||
69 | |||
70 | // local variables | ||
71 | $generator = new HTMLPurifier_Generator($config, $context); | ||
72 | $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); | ||
73 | // used for autoclose early abortion | ||
74 | $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config); | ||
75 | $e = $context->get('ErrorCollector', true); | ||
76 | $i = false; // injector index | ||
77 | list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens); | ||
78 | if ($token === NULL) { | ||
79 | return array(); | ||
80 | } | ||
81 | $reprocess = false; // whether or not to reprocess the same token | ||
82 | $stack = array(); | ||
83 | |||
84 | // member variables | ||
85 | $this->stack =& $stack; | ||
86 | $this->tokens =& $tokens; | ||
87 | $this->token =& $token; | ||
88 | $this->zipper =& $zipper; | ||
89 | $this->config = $config; | ||
90 | $this->context = $context; | ||
91 | |||
92 | // context variables | ||
93 | $context->register('CurrentNesting', $stack); | ||
94 | $context->register('InputZipper', $zipper); | ||
95 | $context->register('CurrentToken', $token); | ||
96 | |||
97 | // -- begin INJECTOR -- | ||
98 | |||
99 | $this->injectors = array(); | ||
100 | |||
101 | $injectors = $config->getBatch('AutoFormat'); | ||
102 | $def_injectors = $definition->info_injector; | ||
103 | $custom_injectors = $injectors['Custom']; | ||
104 | unset($injectors['Custom']); // special case | ||
105 | foreach ($injectors as $injector => $b) { | ||
106 | // XXX: Fix with a legitimate lookup table of enabled filters | ||
107 | if (strpos($injector, '.') !== false) { | ||
108 | continue; | ||
109 | } | ||
110 | $injector = "HTMLPurifier_Injector_$injector"; | ||
111 | if (!$b) { | ||
112 | continue; | ||
113 | } | ||
114 | $this->injectors[] = new $injector; | ||
115 | } | ||
116 | foreach ($def_injectors as $injector) { | ||
117 | // assumed to be objects | ||
118 | $this->injectors[] = $injector; | ||
119 | } | ||
120 | foreach ($custom_injectors as $injector) { | ||
121 | if (!$injector) { | ||
122 | continue; | ||
123 | } | ||
124 | if (is_string($injector)) { | ||
125 | $injector = "HTMLPurifier_Injector_$injector"; | ||
126 | $injector = new $injector; | ||
127 | } | ||
128 | $this->injectors[] = $injector; | ||
129 | } | ||
130 | |||
131 | // give the injectors references to the definition and context | ||
132 | // variables for performance reasons | ||
133 | foreach ($this->injectors as $ix => $injector) { | ||
134 | $error = $injector->prepare($config, $context); | ||
135 | if (!$error) { | ||
136 | continue; | ||
137 | } | ||
138 | array_splice($this->injectors, $ix, 1); // rm the injector | ||
139 | trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING); | ||
140 | } | ||
141 | |||
142 | // -- end INJECTOR -- | ||
143 | |||
144 | // a note on reprocessing: | ||
145 | // In order to reduce code duplication, whenever some code needs | ||
146 | // to make HTML changes in order to make things "correct", the | ||
147 | // new HTML gets sent through the purifier, regardless of its | ||
148 | // status. This means that if we add a start token, because it | ||
149 | // was totally necessary, we don't have to update nesting; we just | ||
150 | // punt ($reprocess = true; continue;) and it does that for us. | ||
151 | |||
152 | // isset is in loop because $tokens size changes during loop exec | ||
153 | for (;; | ||
154 | // only increment if we don't need to reprocess | ||
155 | $reprocess ? $reprocess = false : $token = $zipper->next($token)) { | ||
156 | |||
157 | // check for a rewind | ||
158 | if (is_int($i)) { | ||
159 | // possibility: disable rewinding if the current token has a | ||
160 | // rewind set on it already. This would offer protection from | ||
161 | // infinite loop, but might hinder some advanced rewinding. | ||
162 | $rewind_offset = $this->injectors[$i]->getRewindOffset(); | ||
163 | if (is_int($rewind_offset)) { | ||
164 | for ($j = 0; $j < $rewind_offset; $j++) { | ||
165 | if (empty($zipper->front)) break; | ||
166 | $token = $zipper->prev($token); | ||
167 | // indicate that other injectors should not process this token, | ||
168 | // but we need to reprocess it | ||
169 | unset($token->skip[$i]); | ||
170 | $token->rewind = $i; | ||
171 | if ($token instanceof HTMLPurifier_Token_Start) { | ||
172 | array_pop($this->stack); | ||
173 | } elseif ($token instanceof HTMLPurifier_Token_End) { | ||
174 | $this->stack[] = $token->start; | ||
175 | } | ||
176 | } | ||
177 | } | ||
178 | $i = false; | ||
179 | } | ||
180 | |||
181 | // handle case of document end | ||
182 | if ($token === NULL) { | ||
183 | // kill processing if stack is empty | ||
184 | if (empty($this->stack)) { | ||
185 | break; | ||
186 | } | ||
187 | |||
188 | // peek | ||
189 | $top_nesting = array_pop($this->stack); | ||
190 | $this->stack[] = $top_nesting; | ||
191 | |||
192 | // send error [TagClosedSuppress] | ||
193 | if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { | ||
194 | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); | ||
195 | } | ||
196 | |||
197 | // append, don't splice, since this is the end | ||
198 | $token = new HTMLPurifier_Token_End($top_nesting->name); | ||
199 | |||
200 | // punt! | ||
201 | $reprocess = true; | ||
202 | continue; | ||
203 | } | ||
204 | |||
205 | //echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack); | ||
206 | //flush(); | ||
207 | |||
208 | // quick-check: if it's not a tag, no need to process | ||
209 | if (empty($token->is_tag)) { | ||
210 | if ($token instanceof HTMLPurifier_Token_Text) { | ||
211 | foreach ($this->injectors as $i => $injector) { | ||
212 | if (isset($token->skip[$i])) { | ||
213 | continue; | ||
214 | } | ||
215 | if ($token->rewind !== null && $token->rewind !== $i) { | ||
216 | continue; | ||
217 | } | ||
218 | // XXX fuckup | ||
219 | $r = $token; | ||
220 | $injector->handleText($r); | ||
221 | $token = $this->processToken($r, $i); | ||
222 | $reprocess = true; | ||
223 | break; | ||
224 | } | ||
225 | } | ||
226 | // another possibility is a comment | ||
227 | continue; | ||
228 | } | ||
229 | |||
230 | if (isset($definition->info[$token->name])) { | ||
231 | $type = $definition->info[$token->name]->child->type; | ||
232 | } else { | ||
233 | $type = false; // Type is unknown, treat accordingly | ||
234 | } | ||
235 | |||
236 | // quick tag checks: anything that's *not* an end tag | ||
237 | $ok = false; | ||
238 | if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { | ||
239 | // claims to be a start tag but is empty | ||
240 | $token = new HTMLPurifier_Token_Empty( | ||
241 | $token->name, | ||
242 | $token->attr, | ||
243 | $token->line, | ||
244 | $token->col, | ||
245 | $token->armor | ||
246 | ); | ||
247 | $ok = true; | ||
248 | } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { | ||
249 | // claims to be empty but really is a start tag | ||
250 | // NB: this assignment is required | ||
251 | $old_token = $token; | ||
252 | $token = new HTMLPurifier_Token_End($token->name); | ||
253 | $token = $this->insertBefore( | ||
254 | new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor) | ||
255 | ); | ||
256 | // punt (since we had to modify the input stream in a non-trivial way) | ||
257 | $reprocess = true; | ||
258 | continue; | ||
259 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { | ||
260 | // real empty token | ||
261 | $ok = true; | ||
262 | } elseif ($token instanceof HTMLPurifier_Token_Start) { | ||
263 | // start tag | ||
264 | |||
265 | // ...unless they also have to close their parent | ||
266 | if (!empty($this->stack)) { | ||
267 | |||
268 | // Performance note: you might think that it's rather | ||
269 | // inefficient, recalculating the autoclose information | ||
270 | // for every tag that a token closes (since when we | ||
271 | // do an autoclose, we push a new token into the | ||
272 | // stream and then /process/ that, before | ||
273 | // re-processing this token.) But this is | ||
274 | // necessary, because an injector can make an | ||
275 | // arbitrary transformations to the autoclosing | ||
276 | // tokens we introduce, so things may have changed | ||
277 | // in the meantime. Also, doing the inefficient thing is | ||
278 | // "easy" to reason about (for certain perverse definitions | ||
279 | // of "easy") | ||
280 | |||
281 | $parent = array_pop($this->stack); | ||
282 | $this->stack[] = $parent; | ||
283 | |||
284 | $parent_def = null; | ||
285 | $parent_elements = null; | ||
286 | $autoclose = false; | ||
287 | if (isset($definition->info[$parent->name])) { | ||
288 | $parent_def = $definition->info[$parent->name]; | ||
289 | $parent_elements = $parent_def->child->getAllowedElements($config); | ||
290 | $autoclose = !isset($parent_elements[$token->name]); | ||
291 | } | ||
292 | |||
293 | if ($autoclose && $definition->info[$token->name]->wrap) { | ||
294 | // Check if an element can be wrapped by another | ||
295 | // element to make it valid in a context (for | ||
296 | // example, <ul><ul> needs a <li> in between) | ||
297 | $wrapname = $definition->info[$token->name]->wrap; | ||
298 | $wrapdef = $definition->info[$wrapname]; | ||
299 | $elements = $wrapdef->child->getAllowedElements($config); | ||
300 | if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) { | ||
301 | $newtoken = new HTMLPurifier_Token_Start($wrapname); | ||
302 | $token = $this->insertBefore($newtoken); | ||
303 | $reprocess = true; | ||
304 | continue; | ||
305 | } | ||
306 | } | ||
307 | |||
308 | $carryover = false; | ||
309 | if ($autoclose && $parent_def->formatting) { | ||
310 | $carryover = true; | ||
311 | } | ||
312 | |||
313 | if ($autoclose) { | ||
314 | // check if this autoclose is doomed to fail | ||
315 | // (this rechecks $parent, which his harmless) | ||
316 | $autoclose_ok = isset($global_parent_allowed_elements[$token->name]); | ||
317 | if (!$autoclose_ok) { | ||
318 | foreach ($this->stack as $ancestor) { | ||
319 | $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config); | ||
320 | if (isset($elements[$token->name])) { | ||
321 | $autoclose_ok = true; | ||
322 | break; | ||
323 | } | ||
324 | if ($definition->info[$token->name]->wrap) { | ||
325 | $wrapname = $definition->info[$token->name]->wrap; | ||
326 | $wrapdef = $definition->info[$wrapname]; | ||
327 | $wrap_elements = $wrapdef->child->getAllowedElements($config); | ||
328 | if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) { | ||
329 | $autoclose_ok = true; | ||
330 | break; | ||
331 | } | ||
332 | } | ||
333 | } | ||
334 | } | ||
335 | if ($autoclose_ok) { | ||
336 | // errors need to be updated | ||
337 | $new_token = new HTMLPurifier_Token_End($parent->name); | ||
338 | $new_token->start = $parent; | ||
339 | // [TagClosedSuppress] | ||
340 | if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { | ||
341 | if (!$carryover) { | ||
342 | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); | ||
343 | } else { | ||
344 | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); | ||
345 | } | ||
346 | } | ||
347 | if ($carryover) { | ||
348 | $element = clone $parent; | ||
349 | // [TagClosedAuto] | ||
350 | $element->armor['MakeWellFormed_TagClosedError'] = true; | ||
351 | $element->carryover = true; | ||
352 | $token = $this->processToken(array($new_token, $token, $element)); | ||
353 | } else { | ||
354 | $token = $this->insertBefore($new_token); | ||
355 | } | ||
356 | } else { | ||
357 | $token = $this->remove(); | ||
358 | } | ||
359 | $reprocess = true; | ||
360 | continue; | ||
361 | } | ||
362 | |||
363 | } | ||
364 | $ok = true; | ||
365 | } | ||
366 | |||
367 | if ($ok) { | ||
368 | foreach ($this->injectors as $i => $injector) { | ||
369 | if (isset($token->skip[$i])) { | ||
370 | continue; | ||
371 | } | ||
372 | if ($token->rewind !== null && $token->rewind !== $i) { | ||
373 | continue; | ||
374 | } | ||
375 | $r = $token; | ||
376 | $injector->handleElement($r); | ||
377 | $token = $this->processToken($r, $i); | ||
378 | $reprocess = true; | ||
379 | break; | ||
380 | } | ||
381 | if (!$reprocess) { | ||
382 | // ah, nothing interesting happened; do normal processing | ||
383 | if ($token instanceof HTMLPurifier_Token_Start) { | ||
384 | $this->stack[] = $token; | ||
385 | } elseif ($token instanceof HTMLPurifier_Token_End) { | ||
386 | throw new HTMLPurifier_Exception( | ||
387 | 'Improper handling of end tag in start code; possible error in MakeWellFormed' | ||
388 | ); | ||
389 | } | ||
390 | } | ||
391 | continue; | ||
392 | } | ||
393 | |||
394 | // sanity check: we should be dealing with a closing tag | ||
395 | if (!$token instanceof HTMLPurifier_Token_End) { | ||
396 | throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier'); | ||
397 | } | ||
398 | |||
399 | // make sure that we have something open | ||
400 | if (empty($this->stack)) { | ||
401 | if ($escape_invalid_tags) { | ||
402 | if ($e) { | ||
403 | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); | ||
404 | } | ||
405 | $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token)); | ||
406 | } else { | ||
407 | if ($e) { | ||
408 | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); | ||
409 | } | ||
410 | $token = $this->remove(); | ||
411 | } | ||
412 | $reprocess = true; | ||
413 | continue; | ||
414 | } | ||
415 | |||
416 | // first, check for the simplest case: everything closes neatly. | ||
417 | // Eventually, everything passes through here; if there are problems | ||
418 | // we modify the input stream accordingly and then punt, so that | ||
419 | // the tokens get processed again. | ||
420 | $current_parent = array_pop($this->stack); | ||
421 | if ($current_parent->name == $token->name) { | ||
422 | $token->start = $current_parent; | ||
423 | foreach ($this->injectors as $i => $injector) { | ||
424 | if (isset($token->skip[$i])) { | ||
425 | continue; | ||
426 | } | ||
427 | if ($token->rewind !== null && $token->rewind !== $i) { | ||
428 | continue; | ||
429 | } | ||
430 | $r = $token; | ||
431 | $injector->handleEnd($r); | ||
432 | $token = $this->processToken($r, $i); | ||
433 | $this->stack[] = $current_parent; | ||
434 | $reprocess = true; | ||
435 | break; | ||
436 | } | ||
437 | continue; | ||
438 | } | ||
439 | |||
440 | // okay, so we're trying to close the wrong tag | ||
441 | |||
442 | // undo the pop previous pop | ||
443 | $this->stack[] = $current_parent; | ||
444 | |||
445 | // scroll back the entire nest, trying to find our tag. | ||
446 | // (feature could be to specify how far you'd like to go) | ||
447 | $size = count($this->stack); | ||
448 | // -2 because -1 is the last element, but we already checked that | ||
449 | $skipped_tags = false; | ||
450 | for ($j = $size - 2; $j >= 0; $j--) { | ||
451 | if ($this->stack[$j]->name == $token->name) { | ||
452 | $skipped_tags = array_slice($this->stack, $j); | ||
453 | break; | ||
454 | } | ||
455 | } | ||
456 | |||
457 | // we didn't find the tag, so remove | ||
458 | if ($skipped_tags === false) { | ||
459 | if ($escape_invalid_tags) { | ||
460 | if ($e) { | ||
461 | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); | ||
462 | } | ||
463 | $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token)); | ||
464 | } else { | ||
465 | if ($e) { | ||
466 | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); | ||
467 | } | ||
468 | $token = $this->remove(); | ||
469 | } | ||
470 | $reprocess = true; | ||
471 | continue; | ||
472 | } | ||
473 | |||
474 | // do errors, in REVERSE $j order: a,b,c with </a></b></c> | ||
475 | $c = count($skipped_tags); | ||
476 | if ($e) { | ||
477 | for ($j = $c - 1; $j > 0; $j--) { | ||
478 | // notice we exclude $j == 0, i.e. the current ending tag, from | ||
479 | // the errors... [TagClosedSuppress] | ||
480 | if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) { | ||
481 | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]); | ||
482 | } | ||
483 | } | ||
484 | } | ||
485 | |||
486 | // insert tags, in FORWARD $j order: c,b,a with </a></b></c> | ||
487 | $replace = array($token); | ||
488 | for ($j = 1; $j < $c; $j++) { | ||
489 | // ...as well as from the insertions | ||
490 | $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name); | ||
491 | $new_token->start = $skipped_tags[$j]; | ||
492 | array_unshift($replace, $new_token); | ||
493 | if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { | ||
494 | // [TagClosedAuto] | ||
495 | $element = clone $skipped_tags[$j]; | ||
496 | $element->carryover = true; | ||
497 | $element->armor['MakeWellFormed_TagClosedError'] = true; | ||
498 | $replace[] = $element; | ||
499 | } | ||
500 | } | ||
501 | $token = $this->processToken($replace); | ||
502 | $reprocess = true; | ||
503 | continue; | ||
504 | } | ||
505 | |||
506 | $context->destroy('CurrentToken'); | ||
507 | $context->destroy('CurrentNesting'); | ||
508 | $context->destroy('InputZipper'); | ||
509 | |||
510 | unset($this->injectors, $this->stack, $this->tokens); | ||
511 | return $zipper->toArray($token); | ||
512 | } | ||
513 | |||
514 | /** | ||
515 | * Processes arbitrary token values for complicated substitution patterns. | ||
516 | * In general: | ||
517 | * | ||
518 | * If $token is an array, it is a list of tokens to substitute for the | ||
519 | * current token. These tokens then get individually processed. If there | ||
520 | * is a leading integer in the list, that integer determines how many | ||
521 | * tokens from the stream should be removed. | ||
522 | * | ||
523 | * If $token is a regular token, it is swapped with the current token. | ||
524 | * | ||
525 | * If $token is false, the current token is deleted. | ||
526 | * | ||
527 | * If $token is an integer, that number of tokens (with the first token | ||
528 | * being the current one) will be deleted. | ||
529 | * | ||
530 | * @param HTMLPurifier_Token|array|int|bool $token Token substitution value | ||
531 | * @param HTMLPurifier_Injector|int $injector Injector that performed the substitution; default is if | ||
532 | * this is not an injector related operation. | ||
533 | * @throws HTMLPurifier_Exception | ||
534 | */ | ||
535 | protected function processToken($token, $injector = -1) | ||
536 | { | ||
537 | // normalize forms of token | ||
538 | if (is_object($token)) { | ||
539 | $token = array(1, $token); | ||
540 | } | ||
541 | if (is_int($token)) { | ||
542 | $token = array($token); | ||
543 | } | ||
544 | if ($token === false) { | ||
545 | $token = array(1); | ||
546 | } | ||
547 | if (!is_array($token)) { | ||
548 | throw new HTMLPurifier_Exception('Invalid token type from injector'); | ||
549 | } | ||
550 | if (!is_int($token[0])) { | ||
551 | array_unshift($token, 1); | ||
552 | } | ||
553 | if ($token[0] === 0) { | ||
554 | throw new HTMLPurifier_Exception('Deleting zero tokens is not valid'); | ||
555 | } | ||
556 | |||
557 | // $token is now an array with the following form: | ||
558 | // array(number nodes to delete, new node 1, new node 2, ...) | ||
559 | |||
560 | $delete = array_shift($token); | ||
561 | list($old, $r) = $this->zipper->splice($this->token, $delete, $token); | ||
562 | |||
563 | if ($injector > -1) { | ||
564 | // determine appropriate skips | ||
565 | $oldskip = isset($old[0]) ? $old[0]->skip : array(); | ||
566 | foreach ($token as $object) { | ||
567 | $object->skip = $oldskip; | ||
568 | $object->skip[$injector] = true; | ||
569 | } | ||
570 | } | ||
571 | |||
572 | return $r; | ||
573 | |||
574 | } | ||
575 | |||
576 | /** | ||
577 | * Inserts a token before the current token. Cursor now points to | ||
578 | * this token. You must reprocess after this. | ||
579 | * @param HTMLPurifier_Token $token | ||
580 | */ | ||
581 | private function insertBefore($token) | ||
582 | { | ||
583 | // NB not $this->zipper->insertBefore(), due to positioning | ||
584 | // differences | ||
585 | $splice = $this->zipper->splice($this->token, 0, array($token)); | ||
586 | |||
587 | return $splice[1]; | ||
588 | } | ||
589 | |||
590 | /** | ||
591 | * Removes current token. Cursor now points to new token occupying previously | ||
592 | * occupied space. You must reprocess after this. | ||
593 | */ | ||
594 | private function remove() | ||
595 | { | ||
596 | return $this->zipper->delete(); | ||
597 | } | ||
598 | } | ||
599 | |||
600 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php new file mode 100644 index 00000000..d1adf59f --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php | |||
@@ -0,0 +1,207 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Removes all unrecognized tags from the list of tokens. | ||
5 | * | ||
6 | * This strategy iterates through all the tokens and removes unrecognized | ||
7 | * tokens. If a token is not recognized but a TagTransform is defined for | ||
8 | * that element, the element will be transformed accordingly. | ||
9 | */ | ||
10 | |||
11 | class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy | ||
12 | { | ||
13 | |||
14 | /** | ||
15 | * @param HTMLPurifier_Token[] $tokens | ||
16 | * @param HTMLPurifier_Config $config | ||
17 | * @param HTMLPurifier_Context $context | ||
18 | * @return array|HTMLPurifier_Token[] | ||
19 | */ | ||
20 | public function execute($tokens, $config, $context) | ||
21 | { | ||
22 | $definition = $config->getHTMLDefinition(); | ||
23 | $generator = new HTMLPurifier_Generator($config, $context); | ||
24 | $result = array(); | ||
25 | |||
26 | $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); | ||
27 | $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); | ||
28 | |||
29 | // currently only used to determine if comments should be kept | ||
30 | $trusted = $config->get('HTML.Trusted'); | ||
31 | $comment_lookup = $config->get('HTML.AllowedComments'); | ||
32 | $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); | ||
33 | $check_comments = $comment_lookup !== array() || $comment_regexp !== null; | ||
34 | |||
35 | $remove_script_contents = $config->get('Core.RemoveScriptContents'); | ||
36 | $hidden_elements = $config->get('Core.HiddenElements'); | ||
37 | |||
38 | // remove script contents compatibility | ||
39 | if ($remove_script_contents === true) { | ||
40 | $hidden_elements['script'] = true; | ||
41 | } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { | ||
42 | unset($hidden_elements['script']); | ||
43 | } | ||
44 | |||
45 | $attr_validator = new HTMLPurifier_AttrValidator(); | ||
46 | |||
47 | // removes tokens until it reaches a closing tag with its value | ||
48 | $remove_until = false; | ||
49 | |||
50 | // converts comments into text tokens when this is equal to a tag name | ||
51 | $textify_comments = false; | ||
52 | |||
53 | $token = false; | ||
54 | $context->register('CurrentToken', $token); | ||
55 | |||
56 | $e = false; | ||
57 | if ($config->get('Core.CollectErrors')) { | ||
58 | $e =& $context->get('ErrorCollector'); | ||
59 | } | ||
60 | |||
61 | foreach ($tokens as $token) { | ||
62 | if ($remove_until) { | ||
63 | if (empty($token->is_tag) || $token->name !== $remove_until) { | ||
64 | continue; | ||
65 | } | ||
66 | } | ||
67 | if (!empty($token->is_tag)) { | ||
68 | // DEFINITION CALL | ||
69 | |||
70 | // before any processing, try to transform the element | ||
71 | if (isset($definition->info_tag_transform[$token->name])) { | ||
72 | $original_name = $token->name; | ||
73 | // there is a transformation for this tag | ||
74 | // DEFINITION CALL | ||
75 | $token = $definition-> | ||
76 | info_tag_transform[$token->name]->transform($token, $config, $context); | ||
77 | if ($e) { | ||
78 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | if (isset($definition->info[$token->name])) { | ||
83 | // mostly everything's good, but | ||
84 | // we need to make sure required attributes are in order | ||
85 | if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && | ||
86 | $definition->info[$token->name]->required_attr && | ||
87 | ($token->name != 'img' || $remove_invalid_img) // ensure config option still works | ||
88 | ) { | ||
89 | $attr_validator->validateToken($token, $config, $context); | ||
90 | $ok = true; | ||
91 | foreach ($definition->info[$token->name]->required_attr as $name) { | ||
92 | if (!isset($token->attr[$name])) { | ||
93 | $ok = false; | ||
94 | break; | ||
95 | } | ||
96 | } | ||
97 | if (!$ok) { | ||
98 | if ($e) { | ||
99 | $e->send( | ||
100 | E_ERROR, | ||
101 | 'Strategy_RemoveForeignElements: Missing required attribute', | ||
102 | $name | ||
103 | ); | ||
104 | } | ||
105 | continue; | ||
106 | } | ||
107 | $token->armor['ValidateAttributes'] = true; | ||
108 | } | ||
109 | |||
110 | if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { | ||
111 | $textify_comments = $token->name; | ||
112 | } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { | ||
113 | $textify_comments = false; | ||
114 | } | ||
115 | |||
116 | } elseif ($escape_invalid_tags) { | ||
117 | // invalid tag, generate HTML representation and insert in | ||
118 | if ($e) { | ||
119 | $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); | ||
120 | } | ||
121 | $token = new HTMLPurifier_Token_Text( | ||
122 | $generator->generateFromToken($token) | ||
123 | ); | ||
124 | } else { | ||
125 | // check if we need to destroy all of the tag's children | ||
126 | // CAN BE GENERICIZED | ||
127 | if (isset($hidden_elements[$token->name])) { | ||
128 | if ($token instanceof HTMLPurifier_Token_Start) { | ||
129 | $remove_until = $token->name; | ||
130 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { | ||
131 | // do nothing: we're still looking | ||
132 | } else { | ||
133 | $remove_until = false; | ||
134 | } | ||
135 | if ($e) { | ||
136 | $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); | ||
137 | } | ||
138 | } else { | ||
139 | if ($e) { | ||
140 | $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); | ||
141 | } | ||
142 | } | ||
143 | continue; | ||
144 | } | ||
145 | } elseif ($token instanceof HTMLPurifier_Token_Comment) { | ||
146 | // textify comments in script tags when they are allowed | ||
147 | if ($textify_comments !== false) { | ||
148 | $data = $token->data; | ||
149 | $token = new HTMLPurifier_Token_Text($data); | ||
150 | } elseif ($trusted || $check_comments) { | ||
151 | // always cleanup comments | ||
152 | $trailing_hyphen = false; | ||
153 | if ($e) { | ||
154 | // perform check whether or not there's a trailing hyphen | ||
155 | if (substr($token->data, -1) == '-') { | ||
156 | $trailing_hyphen = true; | ||
157 | } | ||
158 | } | ||
159 | $token->data = rtrim($token->data, '-'); | ||
160 | $found_double_hyphen = false; | ||
161 | while (strpos($token->data, '--') !== false) { | ||
162 | $found_double_hyphen = true; | ||
163 | $token->data = str_replace('--', '-', $token->data); | ||
164 | } | ||
165 | if ($trusted || !empty($comment_lookup[trim($token->data)]) || | ||
166 | ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) { | ||
167 | // OK good | ||
168 | if ($e) { | ||
169 | if ($trailing_hyphen) { | ||
170 | $e->send( | ||
171 | E_NOTICE, | ||
172 | 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' | ||
173 | ); | ||
174 | } | ||
175 | if ($found_double_hyphen) { | ||
176 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); | ||
177 | } | ||
178 | } | ||
179 | } else { | ||
180 | if ($e) { | ||
181 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); | ||
182 | } | ||
183 | continue; | ||
184 | } | ||
185 | } else { | ||
186 | // strip comments | ||
187 | if ($e) { | ||
188 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); | ||
189 | } | ||
190 | continue; | ||
191 | } | ||
192 | } elseif ($token instanceof HTMLPurifier_Token_Text) { | ||
193 | } else { | ||
194 | continue; | ||
195 | } | ||
196 | $result[] = $token; | ||
197 | } | ||
198 | if ($remove_until && $e) { | ||
199 | // we removed tokens until the end, throw error | ||
200 | $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); | ||
201 | } | ||
202 | $context->destroy('CurrentToken'); | ||
203 | return $result; | ||
204 | } | ||
205 | } | ||
206 | |||
207 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php new file mode 100644 index 00000000..428f975f --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php | |||
@@ -0,0 +1,45 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Validate all attributes in the tokens. | ||
5 | */ | ||
6 | |||
7 | class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy | ||
8 | { | ||
9 | |||
10 | /** | ||
11 | * @param HTMLPurifier_Token[] $tokens | ||
12 | * @param HTMLPurifier_Config $config | ||
13 | * @param HTMLPurifier_Context $context | ||
14 | * @return HTMLPurifier_Token[] | ||
15 | */ | ||
16 | public function execute($tokens, $config, $context) | ||
17 | { | ||
18 | // setup validator | ||
19 | $validator = new HTMLPurifier_AttrValidator(); | ||
20 | |||
21 | $token = false; | ||
22 | $context->register('CurrentToken', $token); | ||
23 | |||
24 | foreach ($tokens as $key => $token) { | ||
25 | |||
26 | // only process tokens that have attributes, | ||
27 | // namely start and empty tags | ||
28 | if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) { | ||
29 | continue; | ||
30 | } | ||
31 | |||
32 | // skip tokens that are armored | ||
33 | if (!empty($token->armor['ValidateAttributes'])) { | ||
34 | continue; | ||
35 | } | ||
36 | |||
37 | // note that we have no facilities here for removing tokens | ||
38 | $validator->validateToken($token, $config, $context); | ||
39 | } | ||
40 | $context->destroy('CurrentToken'); | ||
41 | return $tokens; | ||
42 | } | ||
43 | } | ||
44 | |||
45 | // vim: et sw=4 sts=4 | ||