diff options
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php new file mode 100644 index 00000000..8b1eb20f --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php | |||
@@ -0,0 +1,181 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Takes a well formed list of tokens and fixes their nesting. | ||
5 | * | ||
6 | * HTML elements dictate which elements are allowed to be their children, | ||
7 | * for example, you can't have a p tag in a span tag. Other elements have | ||
8 | * much more rigorous definitions: tables, for instance, require a specific | ||
9 | * order for their elements. There are also constraints not expressible by | ||
10 | * document type definitions, such as the chameleon nature of ins/del | ||
11 | * tags and global child exclusions. | ||
12 | * | ||
13 | * The first major objective of this strategy is to iterate through all | ||
14 | * the nodes and determine whether or not their children conform to the | ||
15 | * element's definition. If they do not, the child definition may | ||
16 | * optionally supply an amended list of elements that is valid or | ||
17 | * require that the entire node be deleted (and the previous node | ||
18 | * rescanned). | ||
19 | * | ||
20 | * The second objective is to ensure that explicitly excluded elements of | ||
21 | * an element do not appear in its children. Code that accomplishes this | ||
22 | * task is pervasive through the strategy, though the two are distinct tasks | ||
23 | * and could, theoretically, be seperated (although it's not recommended). | ||
24 | * | ||
25 | * @note Whether or not unrecognized children are silently dropped or | ||
26 | * translated into text depends on the child definitions. | ||
27 | * | ||
28 | * @todo Enable nodes to be bubbled out of the structure. This is | ||
29 | * easier with our new algorithm. | ||
30 | */ | ||
31 | |||
32 | class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy | ||
33 | { | ||
34 | |||
35 | /** | ||
36 | * @param HTMLPurifier_Token[] $tokens | ||
37 | * @param HTMLPurifier_Config $config | ||
38 | * @param HTMLPurifier_Context $context | ||
39 | * @return array|HTMLPurifier_Token[] | ||
40 | */ | ||
41 | public function execute($tokens, $config, $context) | ||
42 | { | ||
43 | |||
44 | //####################################################################// | ||
45 | // Pre-processing | ||
46 | |||
47 | // O(n) pass to convert to a tree, so that we can efficiently | ||
48 | // refer to substrings | ||
49 | $top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context); | ||
50 | |||
51 | // get a copy of the HTML definition | ||
52 | $definition = $config->getHTMLDefinition(); | ||
53 | |||
54 | $excludes_enabled = !$config->get('Core.DisableExcludes'); | ||
55 | |||
56 | // setup the context variable 'IsInline', for chameleon processing | ||
57 | // is 'false' when we are not inline, 'true' when it must always | ||
58 | // be inline, and an integer when it is inline for a certain | ||
59 | // branch of the document tree | ||
60 | $is_inline = $definition->info_parent_def->descendants_are_inline; | ||
61 | $context->register('IsInline', $is_inline); | ||
62 | |||
63 | // setup error collector | ||
64 | $e =& $context->get('ErrorCollector', true); | ||
65 | |||
66 | //####################################################################// | ||
67 | // Loop initialization | ||
68 | |||
69 | // stack that contains all elements that are excluded | ||
70 | // it is organized by parent elements, similar to $stack, | ||
71 | // but it is only populated when an element with exclusions is | ||
72 | // processed, i.e. there won't be empty exclusions. | ||
73 | $exclude_stack = array($definition->info_parent_def->excludes); | ||
74 | |||
75 | // variable that contains the start token while we are processing | ||
76 | // nodes. This enables error reporting to do its job | ||
77 | $node = $top_node; | ||
78 | // dummy token | ||
79 | list($token, $d) = $node->toTokenPair(); | ||
80 | $context->register('CurrentNode', $node); | ||
81 | $context->register('CurrentToken', $token); | ||
82 | |||
83 | //####################################################################// | ||
84 | // Loop | ||
85 | |||
86 | // We need to implement a post-order traversal iteratively, to | ||
87 | // avoid running into stack space limits. This is pretty tricky | ||
88 | // to reason about, so we just manually stack-ify the recursive | ||
89 | // variant: | ||
90 | // | ||
91 | // function f($node) { | ||
92 | // foreach ($node->children as $child) { | ||
93 | // f($child); | ||
94 | // } | ||
95 | // validate($node); | ||
96 | // } | ||
97 | // | ||
98 | // Thus, we will represent a stack frame as array($node, | ||
99 | // $is_inline, stack of children) | ||
100 | // e.g. array_reverse($node->children) - already processed | ||
101 | // children. | ||
102 | |||
103 | $parent_def = $definition->info_parent_def; | ||
104 | $stack = array( | ||
105 | array($top_node, | ||
106 | $parent_def->descendants_are_inline, | ||
107 | $parent_def->excludes, // exclusions | ||
108 | 0) | ||
109 | ); | ||
110 | |||
111 | while (!empty($stack)) { | ||
112 | list($node, $is_inline, $excludes, $ix) = array_pop($stack); | ||
113 | // recursive call | ||
114 | $go = false; | ||
115 | $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name]; | ||
116 | while (isset($node->children[$ix])) { | ||
117 | $child = $node->children[$ix++]; | ||
118 | if ($child instanceof HTMLPurifier_Node_Element) { | ||
119 | $go = true; | ||
120 | $stack[] = array($node, $is_inline, $excludes, $ix); | ||
121 | $stack[] = array($child, | ||
122 | // ToDo: I don't think it matters if it's def or | ||
123 | // child_def, but double check this... | ||
124 | $is_inline || $def->descendants_are_inline, | ||
125 | empty($def->excludes) ? $excludes | ||
126 | : array_merge($excludes, $def->excludes), | ||
127 | 0); | ||
128 | break; | ||
129 | } | ||
130 | }; | ||
131 | if ($go) continue; | ||
132 | list($token, $d) = $node->toTokenPair(); | ||
133 | // base case | ||
134 | if ($excludes_enabled && isset($excludes[$node->name])) { | ||
135 | $node->dead = true; | ||
136 | if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); | ||
137 | } else { | ||
138 | // XXX I suppose it would be slightly more efficient to | ||
139 | // avoid the allocation here and have children | ||
140 | // strategies handle it | ||
141 | $children = array(); | ||
142 | foreach ($node->children as $child) { | ||
143 | if (!$child->dead) $children[] = $child; | ||
144 | } | ||
145 | $result = $def->child->validateChildren($children, $config, $context); | ||
146 | if ($result === true) { | ||
147 | // nop | ||
148 | $node->children = $children; | ||
149 | } elseif ($result === false) { | ||
150 | $node->dead = true; | ||
151 | if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); | ||
152 | } else { | ||
153 | $node->children = $result; | ||
154 | if ($e) { | ||
155 | // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators | ||
156 | if (empty($result) && !empty($children)) { | ||
157 | $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); | ||
158 | } else if ($result != $children) { | ||
159 | $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); | ||
160 | } | ||
161 | } | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | //####################################################################// | ||
167 | // Post-processing | ||
168 | |||
169 | // remove context variables | ||
170 | $context->destroy('IsInline'); | ||
171 | $context->destroy('CurrentNode'); | ||
172 | $context->destroy('CurrentToken'); | ||
173 | |||
174 | //####################################################################// | ||
175 | // Return | ||
176 | |||
177 | return HTMLPurifier_Arborize::flatten($node, $config, $context); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | // vim: et sw=4 sts=4 | ||