diff options
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef')
8 files changed, 790 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php new file mode 100644 index 00000000..f6b2f22e --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php | |||
@@ -0,0 +1,67 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition that uses different definitions depending on context. | ||
5 | * | ||
6 | * The del and ins tags are notable because they allow different types of | ||
7 | * elements depending on whether or not they're in a block or inline context. | ||
8 | * Chameleon allows this behavior to happen by using two different | ||
9 | * definitions depending on context. While this somewhat generalized, | ||
10 | * it is specifically intended for those two tags. | ||
11 | */ | ||
12 | class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef | ||
13 | { | ||
14 | |||
15 | /** | ||
16 | * Instance of the definition object to use when inline. Usually stricter. | ||
17 | * @type HTMLPurifier_ChildDef_Optional | ||
18 | */ | ||
19 | public $inline; | ||
20 | |||
21 | /** | ||
22 | * Instance of the definition object to use when block. | ||
23 | * @type HTMLPurifier_ChildDef_Optional | ||
24 | */ | ||
25 | public $block; | ||
26 | |||
27 | /** | ||
28 | * @type string | ||
29 | */ | ||
30 | public $type = 'chameleon'; | ||
31 | |||
32 | /** | ||
33 | * @param array $inline List of elements to allow when inline. | ||
34 | * @param array $block List of elements to allow when block. | ||
35 | */ | ||
36 | public function __construct($inline, $block) | ||
37 | { | ||
38 | $this->inline = new HTMLPurifier_ChildDef_Optional($inline); | ||
39 | $this->block = new HTMLPurifier_ChildDef_Optional($block); | ||
40 | $this->elements = $this->block->elements; | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * @param HTMLPurifier_Node[] $children | ||
45 | * @param HTMLPurifier_Config $config | ||
46 | * @param HTMLPurifier_Context $context | ||
47 | * @return bool | ||
48 | */ | ||
49 | public function validateChildren($children, $config, $context) | ||
50 | { | ||
51 | if ($context->get('IsInline') === false) { | ||
52 | return $this->block->validateChildren( | ||
53 | $children, | ||
54 | $config, | ||
55 | $context | ||
56 | ); | ||
57 | } else { | ||
58 | return $this->inline->validateChildren( | ||
59 | $children, | ||
60 | $config, | ||
61 | $context | ||
62 | ); | ||
63 | } | ||
64 | } | ||
65 | } | ||
66 | |||
67 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Custom.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Custom.php new file mode 100644 index 00000000..1047cd8e --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Custom.php | |||
@@ -0,0 +1,102 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Custom validation class, accepts DTD child definitions | ||
5 | * | ||
6 | * @warning Currently this class is an all or nothing proposition, that is, | ||
7 | * it will only give a bool return value. | ||
8 | */ | ||
9 | class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef | ||
10 | { | ||
11 | /** | ||
12 | * @type string | ||
13 | */ | ||
14 | public $type = 'custom'; | ||
15 | |||
16 | /** | ||
17 | * @type bool | ||
18 | */ | ||
19 | public $allow_empty = false; | ||
20 | |||
21 | /** | ||
22 | * Allowed child pattern as defined by the DTD. | ||
23 | * @type string | ||
24 | */ | ||
25 | public $dtd_regex; | ||
26 | |||
27 | /** | ||
28 | * PCRE regex derived from $dtd_regex. | ||
29 | * @type string | ||
30 | */ | ||
31 | private $_pcre_regex; | ||
32 | |||
33 | /** | ||
34 | * @param $dtd_regex Allowed child pattern from the DTD | ||
35 | */ | ||
36 | public function __construct($dtd_regex) | ||
37 | { | ||
38 | $this->dtd_regex = $dtd_regex; | ||
39 | $this->_compileRegex(); | ||
40 | } | ||
41 | |||
42 | /** | ||
43 | * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) | ||
44 | */ | ||
45 | protected function _compileRegex() | ||
46 | { | ||
47 | $raw = str_replace(' ', '', $this->dtd_regex); | ||
48 | if ($raw{0} != '(') { | ||
49 | $raw = "($raw)"; | ||
50 | } | ||
51 | $el = '[#a-zA-Z0-9_.-]+'; | ||
52 | $reg = $raw; | ||
53 | |||
54 | // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M | ||
55 | // DOING! Seriously: if there's problems, please report them. | ||
56 | |||
57 | // collect all elements into the $elements array | ||
58 | preg_match_all("/$el/", $reg, $matches); | ||
59 | foreach ($matches[0] as $match) { | ||
60 | $this->elements[$match] = true; | ||
61 | } | ||
62 | |||
63 | // setup all elements as parentheticals with leading commas | ||
64 | $reg = preg_replace("/$el/", '(,\\0)', $reg); | ||
65 | |||
66 | // remove commas when they were not solicited | ||
67 | $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); | ||
68 | |||
69 | // remove all non-paranthetical commas: they are handled by first regex | ||
70 | $reg = preg_replace("/,\(/", '(', $reg); | ||
71 | |||
72 | $this->_pcre_regex = $reg; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * @param HTMLPurifier_Node[] $children | ||
77 | * @param HTMLPurifier_Config $config | ||
78 | * @param HTMLPurifier_Context $context | ||
79 | * @return bool | ||
80 | */ | ||
81 | public function validateChildren($children, $config, $context) | ||
82 | { | ||
83 | $list_of_children = ''; | ||
84 | $nesting = 0; // depth into the nest | ||
85 | foreach ($children as $node) { | ||
86 | if (!empty($node->is_whitespace)) { | ||
87 | continue; | ||
88 | } | ||
89 | $list_of_children .= $node->name . ','; | ||
90 | } | ||
91 | // add leading comma to deal with stray comma declarations | ||
92 | $list_of_children = ',' . rtrim($list_of_children, ','); | ||
93 | $okay = | ||
94 | preg_match( | ||
95 | '/^,?' . $this->_pcre_regex . '$/', | ||
96 | $list_of_children | ||
97 | ); | ||
98 | return (bool)$okay; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Empty.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Empty.php new file mode 100644 index 00000000..bbcde56e --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Empty.php | |||
@@ -0,0 +1,38 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition that disallows all elements. | ||
5 | * @warning validateChildren() in this class is actually never called, because | ||
6 | * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed | ||
7 | * before child definitions are parsed in earnest by | ||
8 | * HTMLPurifier_Strategy_FixNesting. | ||
9 | */ | ||
10 | class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef | ||
11 | { | ||
12 | /** | ||
13 | * @type bool | ||
14 | */ | ||
15 | public $allow_empty = true; | ||
16 | |||
17 | /** | ||
18 | * @type string | ||
19 | */ | ||
20 | public $type = 'empty'; | ||
21 | |||
22 | public function __construct() | ||
23 | { | ||
24 | } | ||
25 | |||
26 | /** | ||
27 | * @param HTMLPurifier_Node[] $children | ||
28 | * @param HTMLPurifier_Config $config | ||
29 | * @param HTMLPurifier_Context $context | ||
30 | * @return array | ||
31 | */ | ||
32 | public function validateChildren($children, $config, $context) | ||
33 | { | ||
34 | return array(); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/List.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/List.php new file mode 100644 index 00000000..5cd76a1a --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/List.php | |||
@@ -0,0 +1,86 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition for list containers ul and ol. | ||
5 | * | ||
6 | * What does this do? The big thing is to handle ol/ul at the top | ||
7 | * level of list nodes, which should be handled specially by /folding/ | ||
8 | * them into the previous list node. We generally shouldn't ever | ||
9 | * see other disallowed elements, because the autoclose behavior | ||
10 | * in MakeWellFormed handles it. | ||
11 | */ | ||
12 | class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef | ||
13 | { | ||
14 | /** | ||
15 | * @type string | ||
16 | */ | ||
17 | public $type = 'list'; | ||
18 | /** | ||
19 | * @type array | ||
20 | */ | ||
21 | // lying a little bit, so that we can handle ul and ol ourselves | ||
22 | // XXX: This whole business with 'wrap' is all a bit unsatisfactory | ||
23 | public $elements = array('li' => true, 'ul' => true, 'ol' => true); | ||
24 | |||
25 | /** | ||
26 | * @param array $children | ||
27 | * @param HTMLPurifier_Config $config | ||
28 | * @param HTMLPurifier_Context $context | ||
29 | * @return array | ||
30 | */ | ||
31 | public function validateChildren($children, $config, $context) | ||
32 | { | ||
33 | // Flag for subclasses | ||
34 | $this->whitespace = false; | ||
35 | |||
36 | // if there are no tokens, delete parent node | ||
37 | if (empty($children)) { | ||
38 | return false; | ||
39 | } | ||
40 | |||
41 | // the new set of children | ||
42 | $result = array(); | ||
43 | |||
44 | // a little sanity check to make sure it's not ALL whitespace | ||
45 | $all_whitespace = true; | ||
46 | |||
47 | $current_li = false; | ||
48 | |||
49 | foreach ($children as $node) { | ||
50 | if (!empty($node->is_whitespace)) { | ||
51 | $result[] = $node; | ||
52 | continue; | ||
53 | } | ||
54 | $all_whitespace = false; // phew, we're not talking about whitespace | ||
55 | |||
56 | if ($node->name === 'li') { | ||
57 | // good | ||
58 | $current_li = $node; | ||
59 | $result[] = $node; | ||
60 | } else { | ||
61 | // we want to tuck this into the previous li | ||
62 | // Invariant: we expect the node to be ol/ul | ||
63 | // ToDo: Make this more robust in the case of not ol/ul | ||
64 | // by distinguishing between existing li and li created | ||
65 | // to handle non-list elements; non-list elements should | ||
66 | // not be appended to an existing li; only li created | ||
67 | // for non-list. This distinction is not currently made. | ||
68 | if ($current_li === false) { | ||
69 | $current_li = new HTMLPurifier_Node_Element('li'); | ||
70 | $result[] = $current_li; | ||
71 | } | ||
72 | $current_li->children[] = $node; | ||
73 | $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo | ||
74 | } | ||
75 | } | ||
76 | if (empty($result)) { | ||
77 | return false; | ||
78 | } | ||
79 | if ($all_whitespace) { | ||
80 | return false; | ||
81 | } | ||
82 | return $result; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Optional.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Optional.php new file mode 100644 index 00000000..1db864d9 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Optional.php | |||
@@ -0,0 +1,45 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition that allows a set of elements, and allows no children. | ||
5 | * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required, | ||
6 | * really, one shouldn't inherit from the other. Only altered behavior | ||
7 | * is to overload a returned false with an array. Thus, it will never | ||
8 | * return false. | ||
9 | */ | ||
10 | class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required | ||
11 | { | ||
12 | /** | ||
13 | * @type bool | ||
14 | */ | ||
15 | public $allow_empty = true; | ||
16 | |||
17 | /** | ||
18 | * @type string | ||
19 | */ | ||
20 | public $type = 'optional'; | ||
21 | |||
22 | /** | ||
23 | * @param array $children | ||
24 | * @param HTMLPurifier_Config $config | ||
25 | * @param HTMLPurifier_Context $context | ||
26 | * @return array | ||
27 | */ | ||
28 | public function validateChildren($children, $config, $context) | ||
29 | { | ||
30 | $result = parent::validateChildren($children, $config, $context); | ||
31 | // we assume that $children is not modified | ||
32 | if ($result === false) { | ||
33 | if (empty($children)) { | ||
34 | return true; | ||
35 | } elseif ($this->whitespace) { | ||
36 | return $children; | ||
37 | } else { | ||
38 | return array(); | ||
39 | } | ||
40 | } | ||
41 | return $result; | ||
42 | } | ||
43 | } | ||
44 | |||
45 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Required.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Required.php new file mode 100644 index 00000000..f6b8e8a2 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Required.php | |||
@@ -0,0 +1,118 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition that allows a set of elements, but disallows empty children. | ||
5 | */ | ||
6 | class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef | ||
7 | { | ||
8 | /** | ||
9 | * Lookup table of allowed elements. | ||
10 | * @type array | ||
11 | */ | ||
12 | public $elements = array(); | ||
13 | |||
14 | /** | ||
15 | * Whether or not the last passed node was all whitespace. | ||
16 | * @type bool | ||
17 | */ | ||
18 | protected $whitespace = false; | ||
19 | |||
20 | /** | ||
21 | * @param array|string $elements List of allowed element names (lowercase). | ||
22 | */ | ||
23 | public function __construct($elements) | ||
24 | { | ||
25 | if (is_string($elements)) { | ||
26 | $elements = str_replace(' ', '', $elements); | ||
27 | $elements = explode('|', $elements); | ||
28 | } | ||
29 | $keys = array_keys($elements); | ||
30 | if ($keys == array_keys($keys)) { | ||
31 | $elements = array_flip($elements); | ||
32 | foreach ($elements as $i => $x) { | ||
33 | $elements[$i] = true; | ||
34 | if (empty($i)) { | ||
35 | unset($elements[$i]); | ||
36 | } // remove blank | ||
37 | } | ||
38 | } | ||
39 | $this->elements = $elements; | ||
40 | } | ||
41 | |||
42 | /** | ||
43 | * @type bool | ||
44 | */ | ||
45 | public $allow_empty = false; | ||
46 | |||
47 | /** | ||
48 | * @type string | ||
49 | */ | ||
50 | public $type = 'required'; | ||
51 | |||
52 | /** | ||
53 | * @param array $children | ||
54 | * @param HTMLPurifier_Config $config | ||
55 | * @param HTMLPurifier_Context $context | ||
56 | * @return array | ||
57 | */ | ||
58 | public function validateChildren($children, $config, $context) | ||
59 | { | ||
60 | // Flag for subclasses | ||
61 | $this->whitespace = false; | ||
62 | |||
63 | // if there are no tokens, delete parent node | ||
64 | if (empty($children)) { | ||
65 | return false; | ||
66 | } | ||
67 | |||
68 | // the new set of children | ||
69 | $result = array(); | ||
70 | |||
71 | // whether or not parsed character data is allowed | ||
72 | // this controls whether or not we silently drop a tag | ||
73 | // or generate escaped HTML from it | ||
74 | $pcdata_allowed = isset($this->elements['#PCDATA']); | ||
75 | |||
76 | // a little sanity check to make sure it's not ALL whitespace | ||
77 | $all_whitespace = true; | ||
78 | |||
79 | $stack = array_reverse($children); | ||
80 | while (!empty($stack)) { | ||
81 | $node = array_pop($stack); | ||
82 | if (!empty($node->is_whitespace)) { | ||
83 | $result[] = $node; | ||
84 | continue; | ||
85 | } | ||
86 | $all_whitespace = false; // phew, we're not talking about whitespace | ||
87 | |||
88 | if (!isset($this->elements[$node->name])) { | ||
89 | // special case text | ||
90 | // XXX One of these ought to be redundant or something | ||
91 | if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) { | ||
92 | $result[] = $node; | ||
93 | continue; | ||
94 | } | ||
95 | // spill the child contents in | ||
96 | // ToDo: Make configurable | ||
97 | if ($node instanceof HTMLPurifier_Node_Element) { | ||
98 | for ($i = count($node->children) - 1; $i >= 0; $i--) { | ||
99 | $stack[] = $node->children[$i]; | ||
100 | } | ||
101 | continue; | ||
102 | } | ||
103 | continue; | ||
104 | } | ||
105 | $result[] = $node; | ||
106 | } | ||
107 | if (empty($result)) { | ||
108 | return false; | ||
109 | } | ||
110 | if ($all_whitespace) { | ||
111 | $this->whitespace = true; | ||
112 | return false; | ||
113 | } | ||
114 | return $result; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php new file mode 100644 index 00000000..38bf9533 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php | |||
@@ -0,0 +1,110 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Takes the contents of blockquote when in strict and reformats for validation. | ||
5 | */ | ||
6 | class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required | ||
7 | { | ||
8 | /** | ||
9 | * @type array | ||
10 | */ | ||
11 | protected $real_elements; | ||
12 | |||
13 | /** | ||
14 | * @type array | ||
15 | */ | ||
16 | protected $fake_elements; | ||
17 | |||
18 | /** | ||
19 | * @type bool | ||
20 | */ | ||
21 | public $allow_empty = true; | ||
22 | |||
23 | /** | ||
24 | * @type string | ||
25 | */ | ||
26 | public $type = 'strictblockquote'; | ||
27 | |||
28 | /** | ||
29 | * @type bool | ||
30 | */ | ||
31 | protected $init = false; | ||
32 | |||
33 | /** | ||
34 | * @param HTMLPurifier_Config $config | ||
35 | * @return array | ||
36 | * @note We don't want MakeWellFormed to auto-close inline elements since | ||
37 | * they might be allowed. | ||
38 | */ | ||
39 | public function getAllowedElements($config) | ||
40 | { | ||
41 | $this->init($config); | ||
42 | return $this->fake_elements; | ||
43 | } | ||
44 | |||
45 | /** | ||
46 | * @param array $children | ||
47 | * @param HTMLPurifier_Config $config | ||
48 | * @param HTMLPurifier_Context $context | ||
49 | * @return array | ||
50 | */ | ||
51 | public function validateChildren($children, $config, $context) | ||
52 | { | ||
53 | $this->init($config); | ||
54 | |||
55 | // trick the parent class into thinking it allows more | ||
56 | $this->elements = $this->fake_elements; | ||
57 | $result = parent::validateChildren($children, $config, $context); | ||
58 | $this->elements = $this->real_elements; | ||
59 | |||
60 | if ($result === false) { | ||
61 | return array(); | ||
62 | } | ||
63 | if ($result === true) { | ||
64 | $result = $children; | ||
65 | } | ||
66 | |||
67 | $def = $config->getHTMLDefinition(); | ||
68 | $block_wrap_name = $def->info_block_wrapper; | ||
69 | $block_wrap = false; | ||
70 | $ret = array(); | ||
71 | |||
72 | foreach ($result as $node) { | ||
73 | if ($block_wrap === false) { | ||
74 | if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) || | ||
75 | ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) { | ||
76 | $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper); | ||
77 | $ret[] = $block_wrap; | ||
78 | } | ||
79 | } else { | ||
80 | if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) { | ||
81 | $block_wrap = false; | ||
82 | |||
83 | } | ||
84 | } | ||
85 | if ($block_wrap) { | ||
86 | $block_wrap->children[] = $node; | ||
87 | } else { | ||
88 | $ret[] = $node; | ||
89 | } | ||
90 | } | ||
91 | return $ret; | ||
92 | } | ||
93 | |||
94 | /** | ||
95 | * @param HTMLPurifier_Config $config | ||
96 | */ | ||
97 | private function init($config) | ||
98 | { | ||
99 | if (!$this->init) { | ||
100 | $def = $config->getHTMLDefinition(); | ||
101 | // allow all inline elements | ||
102 | $this->real_elements = $this->elements; | ||
103 | $this->fake_elements = $def->info_content_sets['Flow']; | ||
104 | $this->fake_elements['#PCDATA'] = true; | ||
105 | $this->init = true; | ||
106 | } | ||
107 | } | ||
108 | } | ||
109 | |||
110 | // vim: et sw=4 sts=4 | ||
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php new file mode 100644 index 00000000..0570c8b8 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/ChildDef/Table.php | |||
@@ -0,0 +1,224 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition for tables. The general idea is to extract out all of the | ||
5 | * essential bits, and then reconstruct it later. | ||
6 | * | ||
7 | * This is a bit confusing, because the DTDs and the W3C | ||
8 | * validators seem to disagree on the appropriate definition. The | ||
9 | * DTD claims: | ||
10 | * | ||
11 | * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) | ||
12 | * | ||
13 | * But actually, the HTML4 spec then has this to say: | ||
14 | * | ||
15 | * The TBODY start tag is always required except when the table | ||
16 | * contains only one table body and no table head or foot sections. | ||
17 | * The TBODY end tag may always be safely omitted. | ||
18 | * | ||
19 | * So the DTD is kind of wrong. The validator is, unfortunately, kind | ||
20 | * of on crack. | ||
21 | * | ||
22 | * The definition changed again in XHTML1.1; and in my opinion, this | ||
23 | * formulation makes the most sense. | ||
24 | * | ||
25 | * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) | ||
26 | * | ||
27 | * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. | ||
28 | * If we encounter a thead, tfoot or tbody, we are placed in the former | ||
29 | * mode, and we *must* wrap any stray tr segments with a tbody. But if | ||
30 | * we don't run into any of them, just have tr tags is OK. | ||
31 | */ | ||
32 | class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef | ||
33 | { | ||
34 | /** | ||
35 | * @type bool | ||
36 | */ | ||
37 | public $allow_empty = false; | ||
38 | |||
39 | /** | ||
40 | * @type string | ||
41 | */ | ||
42 | public $type = 'table'; | ||
43 | |||
44 | /** | ||
45 | * @type array | ||
46 | */ | ||
47 | public $elements = array( | ||
48 | 'tr' => true, | ||
49 | 'tbody' => true, | ||
50 | 'thead' => true, | ||
51 | 'tfoot' => true, | ||
52 | 'caption' => true, | ||
53 | 'colgroup' => true, | ||
54 | 'col' => true | ||
55 | ); | ||
56 | |||
57 | public function __construct() | ||
58 | { | ||
59 | } | ||
60 | |||
61 | /** | ||
62 | * @param array $children | ||
63 | * @param HTMLPurifier_Config $config | ||
64 | * @param HTMLPurifier_Context $context | ||
65 | * @return array | ||
66 | */ | ||
67 | public function validateChildren($children, $config, $context) | ||
68 | { | ||
69 | if (empty($children)) { | ||
70 | return false; | ||
71 | } | ||
72 | |||
73 | // only one of these elements is allowed in a table | ||
74 | $caption = false; | ||
75 | $thead = false; | ||
76 | $tfoot = false; | ||
77 | |||
78 | // whitespace | ||
79 | $initial_ws = array(); | ||
80 | $after_caption_ws = array(); | ||
81 | $after_thead_ws = array(); | ||
82 | $after_tfoot_ws = array(); | ||
83 | |||
84 | // as many of these as you want | ||
85 | $cols = array(); | ||
86 | $content = array(); | ||
87 | |||
88 | $tbody_mode = false; // if true, then we need to wrap any stray | ||
89 | // <tr>s with a <tbody>. | ||
90 | |||
91 | $ws_accum =& $initial_ws; | ||
92 | |||
93 | foreach ($children as $node) { | ||
94 | if ($node instanceof HTMLPurifier_Node_Comment) { | ||
95 | $ws_accum[] = $node; | ||
96 | continue; | ||
97 | } | ||
98 | switch ($node->name) { | ||
99 | case 'tbody': | ||
100 | $tbody_mode = true; | ||
101 | // fall through | ||
102 | case 'tr': | ||
103 | $content[] = $node; | ||
104 | $ws_accum =& $content; | ||
105 | break; | ||
106 | case 'caption': | ||
107 | // there can only be one caption! | ||
108 | if ($caption !== false) break; | ||
109 | $caption = $node; | ||
110 | $ws_accum =& $after_caption_ws; | ||
111 | break; | ||
112 | case 'thead': | ||
113 | $tbody_mode = true; | ||
114 | // XXX This breaks rendering properties with | ||
115 | // Firefox, which never floats a <thead> to | ||
116 | // the top. Ever. (Our scheme will float the | ||
117 | // first <thead> to the top.) So maybe | ||
118 | // <thead>s that are not first should be | ||
119 | // turned into <tbody>? Very tricky, indeed. | ||
120 | if ($thead === false) { | ||
121 | $thead = $node; | ||
122 | $ws_accum =& $after_thead_ws; | ||
123 | } else { | ||
124 | // Oops, there's a second one! What | ||
125 | // should we do? Current behavior is to | ||
126 | // transmutate the first and last entries into | ||
127 | // tbody tags, and then put into content. | ||
128 | // Maybe a better idea is to *attach | ||
129 | // it* to the existing thead or tfoot? | ||
130 | // We don't do this, because Firefox | ||
131 | // doesn't float an extra tfoot to the | ||
132 | // bottom like it does for the first one. | ||
133 | $node->name = 'tbody'; | ||
134 | $content[] = $node; | ||
135 | $ws_accum =& $content; | ||
136 | } | ||
137 | break; | ||
138 | case 'tfoot': | ||
139 | // see above for some aveats | ||
140 | $tbody_mode = true; | ||
141 | if ($tfoot === false) { | ||
142 | $tfoot = $node; | ||
143 | $ws_accum =& $after_tfoot_ws; | ||
144 | } else { | ||
145 | $node->name = 'tbody'; | ||
146 | $content[] = $node; | ||
147 | $ws_accum =& $content; | ||
148 | } | ||
149 | break; | ||
150 | case 'colgroup': | ||
151 | case 'col': | ||
152 | $cols[] = $node; | ||
153 | $ws_accum =& $cols; | ||
154 | break; | ||
155 | case '#PCDATA': | ||
156 | // How is whitespace handled? We treat is as sticky to | ||
157 | // the *end* of the previous element. So all of the | ||
158 | // nonsense we have worked on is to keep things | ||
159 | // together. | ||
160 | if (!empty($node->is_whitespace)) { | ||
161 | $ws_accum[] = $node; | ||
162 | } | ||
163 | break; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | if (empty($content)) { | ||
168 | return false; | ||
169 | } | ||
170 | |||
171 | $ret = $initial_ws; | ||
172 | if ($caption !== false) { | ||
173 | $ret[] = $caption; | ||
174 | $ret = array_merge($ret, $after_caption_ws); | ||
175 | } | ||
176 | if ($cols !== false) { | ||
177 | $ret = array_merge($ret, $cols); | ||
178 | } | ||
179 | if ($thead !== false) { | ||
180 | $ret[] = $thead; | ||
181 | $ret = array_merge($ret, $after_thead_ws); | ||
182 | } | ||
183 | if ($tfoot !== false) { | ||
184 | $ret[] = $tfoot; | ||
185 | $ret = array_merge($ret, $after_tfoot_ws); | ||
186 | } | ||
187 | |||
188 | if ($tbody_mode) { | ||
189 | // we have to shuffle tr into tbody | ||
190 | $current_tr_tbody = null; | ||
191 | |||
192 | foreach($content as $node) { | ||
193 | switch ($node->name) { | ||
194 | case 'tbody': | ||
195 | $current_tr_tbody = null; | ||
196 | $ret[] = $node; | ||
197 | break; | ||
198 | case 'tr': | ||
199 | if ($current_tr_tbody === null) { | ||
200 | $current_tr_tbody = new HTMLPurifier_Node_Element('tbody'); | ||
201 | $ret[] = $current_tr_tbody; | ||
202 | } | ||
203 | $current_tr_tbody->children[] = $node; | ||
204 | break; | ||
205 | case '#PCDATA': | ||
206 | assert($node->is_whitespace); | ||
207 | if ($current_tr_tbody === null) { | ||
208 | $ret[] = $node; | ||
209 | } else { | ||
210 | $current_tr_tbody->children[] = $node; | ||
211 | } | ||
212 | break; | ||
213 | } | ||
214 | } | ||
215 | } else { | ||
216 | $ret = array_merge($ret, $content); | ||
217 | } | ||
218 | |||
219 | return $ret; | ||
220 | |||
221 | } | ||
222 | } | ||
223 | |||
224 | // vim: et sw=4 sts=4 | ||