diff options
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php new file mode 100644 index 00000000..0570c8b8 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php | |||
@@ -0,0 +1,224 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition for tables. The general idea is to extract out all of the | ||
5 | * essential bits, and then reconstruct it later. | ||
6 | * | ||
7 | * This is a bit confusing, because the DTDs and the W3C | ||
8 | * validators seem to disagree on the appropriate definition. The | ||
9 | * DTD claims: | ||
10 | * | ||
11 | * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) | ||
12 | * | ||
13 | * But actually, the HTML4 spec then has this to say: | ||
14 | * | ||
15 | * The TBODY start tag is always required except when the table | ||
16 | * contains only one table body and no table head or foot sections. | ||
17 | * The TBODY end tag may always be safely omitted. | ||
18 | * | ||
19 | * So the DTD is kind of wrong. The validator is, unfortunately, kind | ||
20 | * of on crack. | ||
21 | * | ||
22 | * The definition changed again in XHTML1.1; and in my opinion, this | ||
23 | * formulation makes the most sense. | ||
24 | * | ||
25 | * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) | ||
26 | * | ||
27 | * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. | ||
28 | * If we encounter a thead, tfoot or tbody, we are placed in the former | ||
29 | * mode, and we *must* wrap any stray tr segments with a tbody. But if | ||
30 | * we don't run into any of them, just have tr tags is OK. | ||
31 | */ | ||
32 | class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef | ||
33 | { | ||
34 | /** | ||
35 | * @type bool | ||
36 | */ | ||
37 | public $allow_empty = false; | ||
38 | |||
39 | /** | ||
40 | * @type string | ||
41 | */ | ||
42 | public $type = 'table'; | ||
43 | |||
44 | /** | ||
45 | * @type array | ||
46 | */ | ||
47 | public $elements = array( | ||
48 | 'tr' => true, | ||
49 | 'tbody' => true, | ||
50 | 'thead' => true, | ||
51 | 'tfoot' => true, | ||
52 | 'caption' => true, | ||
53 | 'colgroup' => true, | ||
54 | 'col' => true | ||
55 | ); | ||
56 | |||
57 | public function __construct() | ||
58 | { | ||
59 | } | ||
60 | |||
61 | /** | ||
62 | * @param array $children | ||
63 | * @param HTMLPurifier_Config $config | ||
64 | * @param HTMLPurifier_Context $context | ||
65 | * @return array | ||
66 | */ | ||
67 | public function validateChildren($children, $config, $context) | ||
68 | { | ||
69 | if (empty($children)) { | ||
70 | return false; | ||
71 | } | ||
72 | |||
73 | // only one of these elements is allowed in a table | ||
74 | $caption = false; | ||
75 | $thead = false; | ||
76 | $tfoot = false; | ||
77 | |||
78 | // whitespace | ||
79 | $initial_ws = array(); | ||
80 | $after_caption_ws = array(); | ||
81 | $after_thead_ws = array(); | ||
82 | $after_tfoot_ws = array(); | ||
83 | |||
84 | // as many of these as you want | ||
85 | $cols = array(); | ||
86 | $content = array(); | ||
87 | |||
88 | $tbody_mode = false; // if true, then we need to wrap any stray | ||
89 | // <tr>s with a <tbody>. | ||
90 | |||
91 | $ws_accum =& $initial_ws; | ||
92 | |||
93 | foreach ($children as $node) { | ||
94 | if ($node instanceof HTMLPurifier_Node_Comment) { | ||
95 | $ws_accum[] = $node; | ||
96 | continue; | ||
97 | } | ||
98 | switch ($node->name) { | ||
99 | case 'tbody': | ||
100 | $tbody_mode = true; | ||
101 | // fall through | ||
102 | case 'tr': | ||
103 | $content[] = $node; | ||
104 | $ws_accum =& $content; | ||
105 | break; | ||
106 | case 'caption': | ||
107 | // there can only be one caption! | ||
108 | if ($caption !== false) break; | ||
109 | $caption = $node; | ||
110 | $ws_accum =& $after_caption_ws; | ||
111 | break; | ||
112 | case 'thead': | ||
113 | $tbody_mode = true; | ||
114 | // XXX This breaks rendering properties with | ||
115 | // Firefox, which never floats a <thead> to | ||
116 | // the top. Ever. (Our scheme will float the | ||
117 | // first <thead> to the top.) So maybe | ||
118 | // <thead>s that are not first should be | ||
119 | // turned into <tbody>? Very tricky, indeed. | ||
120 | if ($thead === false) { | ||
121 | $thead = $node; | ||
122 | $ws_accum =& $after_thead_ws; | ||
123 | } else { | ||
124 | // Oops, there's a second one! What | ||
125 | // should we do? Current behavior is to | ||
126 | // transmutate the first and last entries into | ||
127 | // tbody tags, and then put into content. | ||
128 | // Maybe a better idea is to *attach | ||
129 | // it* to the existing thead or tfoot? | ||
130 | // We don't do this, because Firefox | ||
131 | // doesn't float an extra tfoot to the | ||
132 | // bottom like it does for the first one. | ||
133 | $node->name = 'tbody'; | ||
134 | $content[] = $node; | ||
135 | $ws_accum =& $content; | ||
136 | } | ||
137 | break; | ||
138 | case 'tfoot': | ||
139 | // see above for some aveats | ||
140 | $tbody_mode = true; | ||
141 | if ($tfoot === false) { | ||
142 | $tfoot = $node; | ||
143 | $ws_accum =& $after_tfoot_ws; | ||
144 | } else { | ||
145 | $node->name = 'tbody'; | ||
146 | $content[] = $node; | ||
147 | $ws_accum =& $content; | ||
148 | } | ||
149 | break; | ||
150 | case 'colgroup': | ||
151 | case 'col': | ||
152 | $cols[] = $node; | ||
153 | $ws_accum =& $cols; | ||
154 | break; | ||
155 | case '#PCDATA': | ||
156 | // How is whitespace handled? We treat is as sticky to | ||
157 | // the *end* of the previous element. So all of the | ||
158 | // nonsense we have worked on is to keep things | ||
159 | // together. | ||
160 | if (!empty($node->is_whitespace)) { | ||
161 | $ws_accum[] = $node; | ||
162 | } | ||
163 | break; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | if (empty($content)) { | ||
168 | return false; | ||
169 | } | ||
170 | |||
171 | $ret = $initial_ws; | ||
172 | if ($caption !== false) { | ||
173 | $ret[] = $caption; | ||
174 | $ret = array_merge($ret, $after_caption_ws); | ||
175 | } | ||
176 | if ($cols !== false) { | ||
177 | $ret = array_merge($ret, $cols); | ||
178 | } | ||
179 | if ($thead !== false) { | ||
180 | $ret[] = $thead; | ||
181 | $ret = array_merge($ret, $after_thead_ws); | ||
182 | } | ||
183 | if ($tfoot !== false) { | ||
184 | $ret[] = $tfoot; | ||
185 | $ret = array_merge($ret, $after_tfoot_ws); | ||
186 | } | ||
187 | |||
188 | if ($tbody_mode) { | ||
189 | // we have to shuffle tr into tbody | ||
190 | $current_tr_tbody = null; | ||
191 | |||
192 | foreach($content as $node) { | ||
193 | switch ($node->name) { | ||
194 | case 'tbody': | ||
195 | $current_tr_tbody = null; | ||
196 | $ret[] = $node; | ||
197 | break; | ||
198 | case 'tr': | ||
199 | if ($current_tr_tbody === null) { | ||
200 | $current_tr_tbody = new HTMLPurifier_Node_Element('tbody'); | ||
201 | $ret[] = $current_tr_tbody; | ||
202 | } | ||
203 | $current_tr_tbody->children[] = $node; | ||
204 | break; | ||
205 | case '#PCDATA': | ||
206 | assert($node->is_whitespace); | ||
207 | if ($current_tr_tbody === null) { | ||
208 | $ret[] = $node; | ||
209 | } else { | ||
210 | $current_tr_tbody->children[] = $node; | ||
211 | } | ||
212 | break; | ||
213 | } | ||
214 | } | ||
215 | } else { | ||
216 | $ret = array_merge($ret, $content); | ||
217 | } | ||
218 | |||
219 | return $ret; | ||
220 | |||
221 | } | ||
222 | } | ||
223 | |||
224 | // vim: et sw=4 sts=4 | ||