diff options
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/Generator.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/Generator.php | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/Generator.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/Generator.php new file mode 100644 index 00000000..ca76ba6c --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/Generator.php | |||
@@ -0,0 +1,286 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Generates HTML from tokens. | ||
5 | * @todo Refactor interface so that configuration/context is determined | ||
6 | * upon instantiation, no need for messy generateFromTokens() calls | ||
7 | * @todo Make some of the more internal functions protected, and have | ||
8 | * unit tests work around that | ||
9 | */ | ||
10 | class HTMLPurifier_Generator | ||
11 | { | ||
12 | |||
13 | /** | ||
14 | * Whether or not generator should produce XML output. | ||
15 | * @type bool | ||
16 | */ | ||
17 | private $_xhtml = true; | ||
18 | |||
19 | /** | ||
20 | * :HACK: Whether or not generator should comment the insides of <script> tags. | ||
21 | * @type bool | ||
22 | */ | ||
23 | private $_scriptFix = false; | ||
24 | |||
25 | /** | ||
26 | * Cache of HTMLDefinition during HTML output to determine whether or | ||
27 | * not attributes should be minimized. | ||
28 | * @type HTMLPurifier_HTMLDefinition | ||
29 | */ | ||
30 | private $_def; | ||
31 | |||
32 | /** | ||
33 | * Cache of %Output.SortAttr. | ||
34 | * @type bool | ||
35 | */ | ||
36 | private $_sortAttr; | ||
37 | |||
38 | /** | ||
39 | * Cache of %Output.FlashCompat. | ||
40 | * @type bool | ||
41 | */ | ||
42 | private $_flashCompat; | ||
43 | |||
44 | /** | ||
45 | * Cache of %Output.FixInnerHTML. | ||
46 | * @type bool | ||
47 | */ | ||
48 | private $_innerHTMLFix; | ||
49 | |||
50 | /** | ||
51 | * Stack for keeping track of object information when outputting IE | ||
52 | * compatibility code. | ||
53 | * @type array | ||
54 | */ | ||
55 | private $_flashStack = array(); | ||
56 | |||
57 | /** | ||
58 | * Configuration for the generator | ||
59 | * @type HTMLPurifier_Config | ||
60 | */ | ||
61 | protected $config; | ||
62 | |||
63 | /** | ||
64 | * @param HTMLPurifier_Config $config | ||
65 | * @param HTMLPurifier_Context $context | ||
66 | */ | ||
67 | public function __construct($config, $context) | ||
68 | { | ||
69 | $this->config = $config; | ||
70 | $this->_scriptFix = $config->get('Output.CommentScriptContents'); | ||
71 | $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); | ||
72 | $this->_sortAttr = $config->get('Output.SortAttr'); | ||
73 | $this->_flashCompat = $config->get('Output.FlashCompat'); | ||
74 | $this->_def = $config->getHTMLDefinition(); | ||
75 | $this->_xhtml = $this->_def->doctype->xml; | ||
76 | } | ||
77 | |||
78 | /** | ||
79 | * Generates HTML from an array of tokens. | ||
80 | * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token | ||
81 | * @return string Generated HTML | ||
82 | */ | ||
83 | public function generateFromTokens($tokens) | ||
84 | { | ||
85 | if (!$tokens) { | ||
86 | return ''; | ||
87 | } | ||
88 | |||
89 | // Basic algorithm | ||
90 | $html = ''; | ||
91 | for ($i = 0, $size = count($tokens); $i < $size; $i++) { | ||
92 | if ($this->_scriptFix && $tokens[$i]->name === 'script' | ||
93 | && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { | ||
94 | // script special case | ||
95 | // the contents of the script block must be ONE token | ||
96 | // for this to work. | ||
97 | $html .= $this->generateFromToken($tokens[$i++]); | ||
98 | $html .= $this->generateScriptFromToken($tokens[$i++]); | ||
99 | } | ||
100 | $html .= $this->generateFromToken($tokens[$i]); | ||
101 | } | ||
102 | |||
103 | // Tidy cleanup | ||
104 | if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { | ||
105 | $tidy = new Tidy; | ||
106 | $tidy->parseString( | ||
107 | $html, | ||
108 | array( | ||
109 | 'indent'=> true, | ||
110 | 'output-xhtml' => $this->_xhtml, | ||
111 | 'show-body-only' => true, | ||
112 | 'indent-spaces' => 2, | ||
113 | 'wrap' => 68, | ||
114 | ), | ||
115 | 'utf8' | ||
116 | ); | ||
117 | $tidy->cleanRepair(); | ||
118 | $html = (string) $tidy; // explicit cast necessary | ||
119 | } | ||
120 | |||
121 | // Normalize newlines to system defined value | ||
122 | if ($this->config->get('Core.NormalizeNewlines')) { | ||
123 | $nl = $this->config->get('Output.Newline'); | ||
124 | if ($nl === null) { | ||
125 | $nl = PHP_EOL; | ||
126 | } | ||
127 | if ($nl !== "\n") { | ||
128 | $html = str_replace("\n", $nl, $html); | ||
129 | } | ||
130 | } | ||
131 | return $html; | ||
132 | } | ||
133 | |||
134 | /** | ||
135 | * Generates HTML from a single token. | ||
136 | * @param HTMLPurifier_Token $token HTMLPurifier_Token object. | ||
137 | * @return string Generated HTML | ||
138 | */ | ||
139 | public function generateFromToken($token) | ||
140 | { | ||
141 | if (!$token instanceof HTMLPurifier_Token) { | ||
142 | trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); | ||
143 | return ''; | ||
144 | |||
145 | } elseif ($token instanceof HTMLPurifier_Token_Start) { | ||
146 | $attr = $this->generateAttributes($token->attr, $token->name); | ||
147 | if ($this->_flashCompat) { | ||
148 | if ($token->name == "object") { | ||
149 | $flash = new stdclass(); | ||
150 | $flash->attr = $token->attr; | ||
151 | $flash->param = array(); | ||
152 | $this->_flashStack[] = $flash; | ||
153 | } | ||
154 | } | ||
155 | return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; | ||
156 | |||
157 | } elseif ($token instanceof HTMLPurifier_Token_End) { | ||
158 | $_extra = ''; | ||
159 | if ($this->_flashCompat) { | ||
160 | if ($token->name == "object" && !empty($this->_flashStack)) { | ||
161 | // doesn't do anything for now | ||
162 | } | ||
163 | } | ||
164 | return $_extra . '</' . $token->name . '>'; | ||
165 | |||
166 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { | ||
167 | if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { | ||
168 | $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; | ||
169 | } | ||
170 | $attr = $this->generateAttributes($token->attr, $token->name); | ||
171 | return '<' . $token->name . ($attr ? ' ' : '') . $attr . | ||
172 | ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> | ||
173 | . '>'; | ||
174 | |||
175 | } elseif ($token instanceof HTMLPurifier_Token_Text) { | ||
176 | return $this->escape($token->data, ENT_NOQUOTES); | ||
177 | |||
178 | } elseif ($token instanceof HTMLPurifier_Token_Comment) { | ||
179 | return '<!--' . $token->data . '-->'; | ||
180 | } else { | ||
181 | return ''; | ||
182 | |||
183 | } | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * Special case processor for the contents of script tags | ||
188 | * @param HTMLPurifier_Token $token HTMLPurifier_Token object. | ||
189 | * @return string | ||
190 | * @warning This runs into problems if there's already a literal | ||
191 | * --> somewhere inside the script contents. | ||
192 | */ | ||
193 | public function generateScriptFromToken($token) | ||
194 | { | ||
195 | if (!$token instanceof HTMLPurifier_Token_Text) { | ||
196 | return $this->generateFromToken($token); | ||
197 | } | ||
198 | // Thanks <http://lachy.id.au/log/2005/05/script-comments> | ||
199 | $data = preg_replace('#//\s*$#', '', $token->data); | ||
200 | return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * Generates attribute declarations from attribute array. | ||
205 | * @note This does not include the leading or trailing space. | ||
206 | * @param array $assoc_array_of_attributes Attribute array | ||
207 | * @param string $element Name of element attributes are for, used to check | ||
208 | * attribute minimization. | ||
209 | * @return string Generated HTML fragment for insertion. | ||
210 | */ | ||
211 | public function generateAttributes($assoc_array_of_attributes, $element = '') | ||
212 | { | ||
213 | $html = ''; | ||
214 | if ($this->_sortAttr) { | ||
215 | ksort($assoc_array_of_attributes); | ||
216 | } | ||
217 | foreach ($assoc_array_of_attributes as $key => $value) { | ||
218 | if (!$this->_xhtml) { | ||
219 | // Remove namespaced attributes | ||
220 | if (strpos($key, ':') !== false) { | ||
221 | continue; | ||
222 | } | ||
223 | // Check if we should minimize the attribute: val="val" -> val | ||
224 | if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { | ||
225 | $html .= $key . ' '; | ||
226 | continue; | ||
227 | } | ||
228 | } | ||
229 | // Workaround for Internet Explorer innerHTML bug. | ||
230 | // Essentially, Internet Explorer, when calculating | ||
231 | // innerHTML, omits quotes if there are no instances of | ||
232 | // angled brackets, quotes or spaces. However, when parsing | ||
233 | // HTML (for example, when you assign to innerHTML), it | ||
234 | // treats backticks as quotes. Thus, | ||
235 | // <img alt="``" /> | ||
236 | // becomes | ||
237 | // <img alt=`` /> | ||
238 | // becomes | ||
239 | // <img alt='' /> | ||
240 | // Fortunately, all we need to do is trigger an appropriate | ||
241 | // quoting style, which we do by adding an extra space. | ||
242 | // This also is consistent with the W3C spec, which states | ||
243 | // that user agents may ignore leading or trailing | ||
244 | // whitespace (in fact, most don't, at least for attributes | ||
245 | // like alt, but an extra space at the end is barely | ||
246 | // noticeable). Still, we have a configuration knob for | ||
247 | // this, since this transformation is not necesary if you | ||
248 | // don't process user input with innerHTML or you don't plan | ||
249 | // on supporting Internet Explorer. | ||
250 | if ($this->_innerHTMLFix) { | ||
251 | if (strpos($value, '`') !== false) { | ||
252 | // check if correct quoting style would not already be | ||
253 | // triggered | ||
254 | if (strcspn($value, '"\' <>') === strlen($value)) { | ||
255 | // protect! | ||
256 | $value .= ' '; | ||
257 | } | ||
258 | } | ||
259 | } | ||
260 | $html .= $key.'="'.$this->escape($value).'" '; | ||
261 | } | ||
262 | return rtrim($html); | ||
263 | } | ||
264 | |||
265 | /** | ||
266 | * Escapes raw text data. | ||
267 | * @todo This really ought to be protected, but until we have a facility | ||
268 | * for properly generating HTML here w/o using tokens, it stays | ||
269 | * public. | ||
270 | * @param string $string String data to escape for HTML. | ||
271 | * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is | ||
272 | * permissible for non-attribute output. | ||
273 | * @return string escaped data. | ||
274 | */ | ||
275 | public function escape($string, $quote = null) | ||
276 | { | ||
277 | // Workaround for APC bug on Mac Leopard reported by sidepodcast | ||
278 | // http://htmlpurifier.org/phorum/read.php?3,4823,4846 | ||
279 | if ($quote === null) { | ||
280 | $quote = ENT_COMPAT; | ||
281 | } | ||
282 | return htmlspecialchars($string, $quote, 'UTF-8'); | ||
283 | } | ||
284 | } | ||
285 | |||
286 | // vim: et sw=4 sts=4 | ||