[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / ChildDef / Table.php

<?php\r
\r
/**\r
 * Definition for tables.  The general idea is to extract out all of the\r
 * essential bits, and then reconstruct it later.\r
 *\r
 * This is a bit confusing, because the DTDs and the W3C\r
 * validators seem to disagree on the appropriate definition. The\r
 * DTD claims:\r
 *\r
 *      (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)\r
 *\r
 * But actually, the HTML4 spec then has this to say:\r
 *\r
 *      The TBODY start tag is always required except when the table\r
 *      contains only one table body and no table head or foot sections.\r
 *      The TBODY end tag may always be safely omitted.\r
 *\r
 * So the DTD is kind of wrong.  The validator is, unfortunately, kind\r
 * of on crack.\r
 *\r
 * The definition changed again in XHTML1.1; and in my opinion, this\r
 * formulation makes the most sense.\r
 *\r
 *      caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))\r
 *\r
 * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.\r
 * If we encounter a thead, tfoot or tbody, we are placed in the former\r
 * mode, and we *must* wrap any stray tr segments with a tbody. But if\r
 * we don't run into any of them, just have tr tags is OK.\r
 */\r
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef\r
{\r
    /**\r
     * @type bool\r
     */\r
    public $allow_empty = false;\r
\r
    /**\r
     * @type string\r
     */\r
    public $type = 'table';\r
\r
    /**\r
     * @type array\r
     */\r
    public $elements = array(\r
        'tr' => true,\r
        'tbody' => true,\r
        'thead' => true,\r
        'tfoot' => true,\r
        'caption' => true,\r
        'colgroup' => true,\r
        'col' => true\r
    );\r
\r
    public function __construct()\r
    {\r
    }\r
\r
    /**\r
     * @param array $children\r
     * @param HTMLPurifier_Config $config\r
     * @param HTMLPurifier_Context $context\r
     * @return array\r
     */\r
    public function validateChildren($children, $config, $context)\r
    {\r
        if (empty($children)) {\r
            return false;\r
        }\r
\r
        // only one of these elements is allowed in a table\r
        $caption = false;\r
        $thead = false;\r
        $tfoot = false;\r
\r
        // whitespace\r
        $initial_ws = array();\r
        $after_caption_ws = array();\r
        $after_thead_ws = array();\r
        $after_tfoot_ws = array();\r
\r
        // as many of these as you want\r
        $cols = array();\r
        $content = array();\r
\r
        $tbody_mode = false; // if true, then we need to wrap any stray\r
                             // <tr>s with a <tbody>.\r
\r
        $ws_accum =& $initial_ws;\r
\r
        foreach ($children as $node) {\r
            if ($node instanceof HTMLPurifier_Node_Comment) {\r
                $ws_accum[] = $node;\r
                continue;\r
            }\r
            switch ($node->name) {\r
            case 'tbody':\r
                $tbody_mode = true;\r
                // fall through\r
            case 'tr':\r
                $content[] = $node;\r
                $ws_accum =& $content;\r
                break;\r
            case 'caption':\r
                // there can only be one caption!\r
                if ($caption !== false)  break;\r
                $caption = $node;\r
                $ws_accum =& $after_caption_ws;\r
                break;\r
            case 'thead':\r
                $tbody_mode = true;\r
                // XXX This breaks rendering properties with\r
                // Firefox, which never floats a <thead> to\r
                // the top. Ever. (Our scheme will float the\r
                // first <thead> to the top.)  So maybe\r
                // <thead>s that are not first should be\r
                // turned into <tbody>? Very tricky, indeed.\r
                if ($thead === false) {\r
                    $thead = $node;\r
                    $ws_accum =& $after_thead_ws;\r
                } else {\r
                    // Oops, there's a second one! What\r
                    // should we do?  Current behavior is to\r
                    // transmutate the first and last entries into\r
                    // tbody tags, and then put into content.\r
                    // Maybe a better idea is to *attach\r
                    // it* to the existing thead or tfoot?\r
                    // We don't do this, because Firefox\r
                    // doesn't float an extra tfoot to the\r
                    // bottom like it does for the first one.\r
                    $node->name = 'tbody';\r
                    $content[] = $node;\r
                    $ws_accum =& $content;\r
                }\r
                break;\r
            case 'tfoot':\r
                // see above for some aveats\r
                $tbody_mode = true;\r
                if ($tfoot === false) {\r
                    $tfoot = $node;\r
                    $ws_accum =& $after_tfoot_ws;\r
                } else {\r
                    $node->name = 'tbody';\r
                    $content[] = $node;\r
                    $ws_accum =& $content;\r
                }\r
                break;\r
            case 'colgroup':\r
            case 'col':\r
                $cols[] = $node;\r
                $ws_accum =& $cols;\r
                break;\r
            case '#PCDATA':\r
                // How is whitespace handled? We treat is as sticky to\r
                // the *end* of the previous element. So all of the\r
                // nonsense we have worked on is to keep things\r
                // together.\r
                if (!empty($node->is_whitespace)) {\r
                    $ws_accum[] = $node;\r
                }\r
                break;\r
            }\r
        }\r
\r
        if (empty($content)) {\r
            return false;\r
        }\r
\r
        $ret = $initial_ws;\r
        if ($caption !== false) {\r
            $ret[] = $caption;\r
            $ret = array_merge($ret, $after_caption_ws);\r
        }\r
        if ($cols !== false) {\r
            $ret = array_merge($ret, $cols);\r
        }\r
        if ($thead !== false) {\r
            $ret[] = $thead;\r
            $ret = array_merge($ret, $after_thead_ws);\r
        }\r
        if ($tfoot !== false) {\r
            $ret[] = $tfoot;\r
            $ret = array_merge($ret, $after_tfoot_ws);\r
        }\r
\r
        if ($tbody_mode) {\r
            // we have to shuffle tr into tbody\r
            $current_tr_tbody = null;\r
\r
            foreach($content as $node) {\r
                switch ($node->name) {\r
                case 'tbody':\r
                    $current_tr_tbody = null;\r
                    $ret[] = $node;\r
                    break;\r
                case 'tr':\r
                    if ($current_tr_tbody === null) {\r
                        $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');\r
                        $ret[] = $current_tr_tbody;\r
                    }\r
                    $current_tr_tbody->children[] = $node;\r
                    break;\r
                case '#PCDATA':\r
                    assert($node->is_whitespace);\r
                    if ($current_tr_tbody === null) {\r
                        $ret[] = $node;\r
                    } else {\r
                        $current_tr_tbody->children[] = $node;\r
                    }\r
                    break;\r
                }\r
            }\r
        } else {\r
            $ret = array_merge($ret, $content);\r
        }\r
\r
        return $ret;\r
\r
    }\r
}\r
\r
// vim: et sw=4 sts=4\r
Commit	Line	Data
d4949327 NL	1	<?php\r
	2	\r
	3	/**\r
	4	* Definition for tables. The general idea is to extract out all of the\r
	5	* essential bits, and then reconstruct it later.\r
	6	*\r
	7	* This is a bit confusing, because the DTDs and the W3C\r
	8	* validators seem to disagree on the appropriate definition. The\r
	9	* DTD claims:\r
	10	*\r
	11	* (CAPTION?, (COL\|COLGROUP), THEAD?, TFOOT?, TBODY+)\r
	12	*\r
	13	* But actually, the HTML4 spec then has this to say:\r
	14	*\r
	15	* The TBODY start tag is always required except when the table\r
	16	* contains only one table body and no table head or foot sections.\r
	17	* The TBODY end tag may always be safely omitted.\r
	18	*\r
	19	* So the DTD is kind of wrong. The validator is, unfortunately, kind\r
	20	* of on crack.\r
	21	*\r
	22	* The definition changed again in XHTML1.1; and in my opinion, this\r
	23	* formulation makes the most sense.\r
	24	*\r
	25	* caption?, ( col* \| colgroup* ), (( thead?, tfoot?, tbody+ ) \| ( tr+ ))\r
	26	*\r
	27	* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.\r
	28	* If we encounter a thead, tfoot or tbody, we are placed in the former\r
	29	* mode, and we must wrap any stray tr segments with a tbody. But if\r
	30	* we don't run into any of them, just have tr tags is OK.\r
	31	*/\r
	32	class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef\r
	33	{\r
	34	/**\r
	35	* @type bool\r
	36	*/\r
	37	public $allow_empty = false;\r
	38	\r
	39	/**\r
	40	* @type string\r
	41	*/\r
	42	public $type = 'table';\r
	43	\r
	44	/**\r
	45	* @type array\r
	46	*/\r
	47	public $elements = array(\r
	48	'tr' => true,\r
	49	'tbody' => true,\r
	50	'thead' => true,\r
	51	'tfoot' => true,\r
	52	'caption' => true,\r
	53	'colgroup' => true,\r
	54	'col' => true\r
	55	);\r
	56	\r
	57	public function __construct()\r
	58	{\r
	59	}\r
	60	\r
	61	/**\r
	62	* @param array $children\r
	63	* @param HTMLPurifier_Config $config\r
	64	* @param HTMLPurifier_Context $context\r
65	* @return array\r
66	*/\r
67	public function validateChildren($children, $config, $context)\r
68	{\r
69	if (empty($children)) {\r
70	return false;\r
71	}\r
72	\r
73	// only one of these elements is allowed in a table\r
74	$caption = false;\r
75	$thead = false;\r
76	$tfoot = false;\r
77	\r
78	// whitespace\r
79	$initial_ws = array();\r
80	$after_caption_ws = array();\r
81	$after_thead_ws = array();\r
82	$after_tfoot_ws = array();\r
83	\r
84	// as many of these as you want\r
85	$cols = array();\r
86	$content = array();\r
87	\r
88	$tbody_mode = false; // if true, then we need to wrap any stray\r
89	// <tr>s with a <tbody>.\r
90	\r
91	$ws_accum =& $initial_ws;\r
92	\r
93	foreach ($children as $node) {\r
94	if ($node instanceof HTMLPurifier_Node_Comment) {\r
95	$ws_accum[] = $node;\r
96	continue;\r
97	}\r
98	switch ($node->name) {\r
99	case 'tbody':\r
100	$tbody_mode = true;\r
101	// fall through\r
102	case 'tr':\r
103	$content[] = $node;\r
104	$ws_accum =& $content;\r
105	break;\r
106	case 'caption':\r
107	// there can only be one caption!\r
108	if ($caption !== false) break;\r
109	$caption = $node;\r
110	$ws_accum =& $after_caption_ws;\r
111	break;\r
112	case 'thead':\r
113	$tbody_mode = true;\r
114	// XXX This breaks rendering properties with\r
115	// Firefox, which never floats a <thead> to\r
116	// the top. Ever. (Our scheme will float the\r
117	// first <thead> to the top.) So maybe\r
118	// <thead>s that are not first should be\r
119	// turned into <tbody>? Very tricky, indeed.\r
120	if ($thead === false) {\r
121	$thead = $node;\r
122	$ws_accum =& $after_thead_ws;\r
123	} else {\r
124	// Oops, there's a second one! What\r
125	// should we do? Current behavior is to\r
126	// transmutate the first and last entries into\r
127	// tbody tags, and then put into content.\r
128	// Maybe a better idea is to *attach\r
129	// it* to the existing thead or tfoot?\r
130	// We don't do this, because Firefox\r
131	// doesn't float an extra tfoot to the\r
132	// bottom like it does for the first one.\r
133	$node->name = 'tbody';\r
134	$content[] = $node;\r
135	$ws_accum =& $content;\r
136	}\r
137	break;\r
138	case 'tfoot':\r
139	// see above for some aveats\r
140	$tbody_mode = true;\r
141	if ($tfoot === false) {\r
142	$tfoot = $node;\r
143	$ws_accum =& $after_tfoot_ws;\r
144	} else {\r
145	$node->name = 'tbody';\r
146	$content[] = $node;\r
147	$ws_accum =& $content;\r
148	}\r
149	break;\r
150	case 'colgroup':\r
151	case 'col':\r
152	$cols[] = $node;\r
153	$ws_accum =& $cols;\r
154	break;\r
155	case '#PCDATA':\r
156	// How is whitespace handled? We treat is as sticky to\r
157	// the end of the previous element. So all of the\r
158	// nonsense we have worked on is to keep things\r
159	// together.\r
160	if (!empty($node->is_whitespace)) {\r
161	$ws_accum[] = $node;\r
162	}\r
163	break;\r
164	}\r
165	}\r
166	\r
167	if (empty($content)) {\r
168	return false;\r
169	}\r
170	\r
171	$ret = $initial_ws;\r
172	if ($caption !== false) {\r
173	$ret[] = $caption;\r
174	$ret = array_merge($ret, $after_caption_ws);\r
175	}\r
176	if ($cols !== false) {\r
177	$ret = array_merge($ret, $cols);\r
178	}\r
179	if ($thead !== false) {\r
180	$ret[] = $thead;\r
181	$ret = array_merge($ret, $after_thead_ws);\r
182	}\r
183	if ($tfoot !== false) {\r
184	$ret[] = $tfoot;\r
185	$ret = array_merge($ret, $after_tfoot_ws);\r
186	}\r
187	\r
188	if ($tbody_mode) {\r
189	// we have to shuffle tr into tbody\r
190	$current_tr_tbody = null;\r
191	\r
192	foreach($content as $node) {\r
193	switch ($node->name) {\r
194	case 'tbody':\r
195	$current_tr_tbody = null;\r
196	$ret[] = $node;\r
197	break;\r
198	case 'tr':\r
199	if ($current_tr_tbody === null) {\r
200	$current_tr_tbody = new HTMLPurifier_Node_Element('tbody');\r
201	$ret[] = $current_tr_tbody;\r
202	}\r
203	$current_tr_tbody->children[] = $node;\r
204	break;\r
205	case '#PCDATA':\r
206	assert($node->is_whitespace);\r
207	if ($current_tr_tbody === null) {\r
208	$ret[] = $node;\r
209	} else {\r
210	$current_tr_tbody->children[] = $node;\r
211	}\r
212	break;\r
213	}\r
214	}\r
215	} else {\r
216	$ret = array_merge($ret, $content);\r
217	}\r
218	\r
219	return $ret;\r
220	\r
221	}\r
222	}\r
223	\r
224	// vim: et sw=4 sts=4\r