]>
Commit | Line | Data |
---|---|---|
1 | <?php\r | |
2 | \r | |
3 | /**\r | |
4 | * Definition for tables. The general idea is to extract out all of the\r | |
5 | * essential bits, and then reconstruct it later.\r | |
6 | *\r | |
7 | * This is a bit confusing, because the DTDs and the W3C\r | |
8 | * validators seem to disagree on the appropriate definition. The\r | |
9 | * DTD claims:\r | |
10 | *\r | |
11 | * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)\r | |
12 | *\r | |
13 | * But actually, the HTML4 spec then has this to say:\r | |
14 | *\r | |
15 | * The TBODY start tag is always required except when the table\r | |
16 | * contains only one table body and no table head or foot sections.\r | |
17 | * The TBODY end tag may always be safely omitted.\r | |
18 | *\r | |
19 | * So the DTD is kind of wrong. The validator is, unfortunately, kind\r | |
20 | * of on crack.\r | |
21 | *\r | |
22 | * The definition changed again in XHTML1.1; and in my opinion, this\r | |
23 | * formulation makes the most sense.\r | |
24 | *\r | |
25 | * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))\r | |
26 | *\r | |
27 | * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.\r | |
28 | * If we encounter a thead, tfoot or tbody, we are placed in the former\r | |
29 | * mode, and we *must* wrap any stray tr segments with a tbody. But if\r | |
30 | * we don't run into any of them, just have tr tags is OK.\r | |
31 | */\r | |
32 | class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef\r | |
33 | {\r | |
34 | /**\r | |
35 | * @type bool\r | |
36 | */\r | |
37 | public $allow_empty = false;\r | |
38 | \r | |
39 | /**\r | |
40 | * @type string\r | |
41 | */\r | |
42 | public $type = 'table';\r | |
43 | \r | |
44 | /**\r | |
45 | * @type array\r | |
46 | */\r | |
47 | public $elements = array(\r | |
48 | 'tr' => true,\r | |
49 | 'tbody' => true,\r | |
50 | 'thead' => true,\r | |
51 | 'tfoot' => true,\r | |
52 | 'caption' => true,\r | |
53 | 'colgroup' => true,\r | |
54 | 'col' => true\r | |
55 | );\r | |
56 | \r | |
57 | public function __construct()\r | |
58 | {\r | |
59 | }\r | |
60 | \r | |
61 | /**\r | |
62 | * @param array $children\r | |
63 | * @param HTMLPurifier_Config $config\r | |
64 | * @param HTMLPurifier_Context $context\r | |
65 | * @return array\r | |
66 | */\r | |
67 | public function validateChildren($children, $config, $context)\r | |
68 | {\r | |
69 | if (empty($children)) {\r | |
70 | return false;\r | |
71 | }\r | |
72 | \r | |
73 | // only one of these elements is allowed in a table\r | |
74 | $caption = false;\r | |
75 | $thead = false;\r | |
76 | $tfoot = false;\r | |
77 | \r | |
78 | // whitespace\r | |
79 | $initial_ws = array();\r | |
80 | $after_caption_ws = array();\r | |
81 | $after_thead_ws = array();\r | |
82 | $after_tfoot_ws = array();\r | |
83 | \r | |
84 | // as many of these as you want\r | |
85 | $cols = array();\r | |
86 | $content = array();\r | |
87 | \r | |
88 | $tbody_mode = false; // if true, then we need to wrap any stray\r | |
89 | // <tr>s with a <tbody>.\r | |
90 | \r | |
91 | $ws_accum =& $initial_ws;\r | |
92 | \r | |
93 | foreach ($children as $node) {\r | |
94 | if ($node instanceof HTMLPurifier_Node_Comment) {\r | |
95 | $ws_accum[] = $node;\r | |
96 | continue;\r | |
97 | }\r | |
98 | switch ($node->name) {\r | |
99 | case 'tbody':\r | |
100 | $tbody_mode = true;\r | |
101 | // fall through\r | |
102 | case 'tr':\r | |
103 | $content[] = $node;\r | |
104 | $ws_accum =& $content;\r | |
105 | break;\r | |
106 | case 'caption':\r | |
107 | // there can only be one caption!\r | |
108 | if ($caption !== false) break;\r | |
109 | $caption = $node;\r | |
110 | $ws_accum =& $after_caption_ws;\r | |
111 | break;\r | |
112 | case 'thead':\r | |
113 | $tbody_mode = true;\r | |
114 | // XXX This breaks rendering properties with\r | |
115 | // Firefox, which never floats a <thead> to\r | |
116 | // the top. Ever. (Our scheme will float the\r | |
117 | // first <thead> to the top.) So maybe\r | |
118 | // <thead>s that are not first should be\r | |
119 | // turned into <tbody>? Very tricky, indeed.\r | |
120 | if ($thead === false) {\r | |
121 | $thead = $node;\r | |
122 | $ws_accum =& $after_thead_ws;\r | |
123 | } else {\r | |
124 | // Oops, there's a second one! What\r | |
125 | // should we do? Current behavior is to\r | |
126 | // transmutate the first and last entries into\r | |
127 | // tbody tags, and then put into content.\r | |
128 | // Maybe a better idea is to *attach\r | |
129 | // it* to the existing thead or tfoot?\r | |
130 | // We don't do this, because Firefox\r | |
131 | // doesn't float an extra tfoot to the\r | |
132 | // bottom like it does for the first one.\r | |
133 | $node->name = 'tbody';\r | |
134 | $content[] = $node;\r | |
135 | $ws_accum =& $content;\r | |
136 | }\r | |
137 | break;\r | |
138 | case 'tfoot':\r | |
139 | // see above for some aveats\r | |
140 | $tbody_mode = true;\r | |
141 | if ($tfoot === false) {\r | |
142 | $tfoot = $node;\r | |
143 | $ws_accum =& $after_tfoot_ws;\r | |
144 | } else {\r | |
145 | $node->name = 'tbody';\r | |
146 | $content[] = $node;\r | |
147 | $ws_accum =& $content;\r | |
148 | }\r | |
149 | break;\r | |
150 | case 'colgroup':\r | |
151 | case 'col':\r | |
152 | $cols[] = $node;\r | |
153 | $ws_accum =& $cols;\r | |
154 | break;\r | |
155 | case '#PCDATA':\r | |
156 | // How is whitespace handled? We treat is as sticky to\r | |
157 | // the *end* of the previous element. So all of the\r | |
158 | // nonsense we have worked on is to keep things\r | |
159 | // together.\r | |
160 | if (!empty($node->is_whitespace)) {\r | |
161 | $ws_accum[] = $node;\r | |
162 | }\r | |
163 | break;\r | |
164 | }\r | |
165 | }\r | |
166 | \r | |
167 | if (empty($content)) {\r | |
168 | return false;\r | |
169 | }\r | |
170 | \r | |
171 | $ret = $initial_ws;\r | |
172 | if ($caption !== false) {\r | |
173 | $ret[] = $caption;\r | |
174 | $ret = array_merge($ret, $after_caption_ws);\r | |
175 | }\r | |
176 | if ($cols !== false) {\r | |
177 | $ret = array_merge($ret, $cols);\r | |
178 | }\r | |
179 | if ($thead !== false) {\r | |
180 | $ret[] = $thead;\r | |
181 | $ret = array_merge($ret, $after_thead_ws);\r | |
182 | }\r | |
183 | if ($tfoot !== false) {\r | |
184 | $ret[] = $tfoot;\r | |
185 | $ret = array_merge($ret, $after_tfoot_ws);\r | |
186 | }\r | |
187 | \r | |
188 | if ($tbody_mode) {\r | |
189 | // we have to shuffle tr into tbody\r | |
190 | $current_tr_tbody = null;\r | |
191 | \r | |
192 | foreach($content as $node) {\r | |
193 | switch ($node->name) {\r | |
194 | case 'tbody':\r | |
195 | $current_tr_tbody = null;\r | |
196 | $ret[] = $node;\r | |
197 | break;\r | |
198 | case 'tr':\r | |
199 | if ($current_tr_tbody === null) {\r | |
200 | $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');\r | |
201 | $ret[] = $current_tr_tbody;\r | |
202 | }\r | |
203 | $current_tr_tbody->children[] = $node;\r | |
204 | break;\r | |
205 | case '#PCDATA':\r | |
206 | assert($node->is_whitespace);\r | |
207 | if ($current_tr_tbody === null) {\r | |
208 | $ret[] = $node;\r | |
209 | } else {\r | |
210 | $current_tr_tbody->children[] = $node;\r | |
211 | }\r | |
212 | break;\r | |
213 | }\r | |
214 | }\r | |
215 | } else {\r | |
216 | $ret = array_merge($ret, $content);\r | |
217 | }\r | |
218 | \r | |
219 | return $ret;\r | |
220 | \r | |
221 | }\r | |
222 | }\r | |
223 | \r | |
224 | // vim: et sw=4 sts=4\r |