]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/htmlpurifier/HTMLPurifier/Generator.php
remove autoload section in composer.json
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / Generator.php
CommitLineData
d4949327
NL
1<?php\r
2\r
3/**\r
4 * Generates HTML from tokens.\r
5 * @todo Refactor interface so that configuration/context is determined\r
6 * upon instantiation, no need for messy generateFromTokens() calls\r
7 * @todo Make some of the more internal functions protected, and have\r
8 * unit tests work around that\r
9 */\r
10class HTMLPurifier_Generator\r
11{\r
12\r
13 /**\r
14 * Whether or not generator should produce XML output.\r
15 * @type bool\r
16 */\r
17 private $_xhtml = true;\r
18\r
19 /**\r
20 * :HACK: Whether or not generator should comment the insides of <script> tags.\r
21 * @type bool\r
22 */\r
23 private $_scriptFix = false;\r
24\r
25 /**\r
26 * Cache of HTMLDefinition during HTML output to determine whether or\r
27 * not attributes should be minimized.\r
28 * @type HTMLPurifier_HTMLDefinition\r
29 */\r
30 private $_def;\r
31\r
32 /**\r
33 * Cache of %Output.SortAttr.\r
34 * @type bool\r
35 */\r
36 private $_sortAttr;\r
37\r
38 /**\r
39 * Cache of %Output.FlashCompat.\r
40 * @type bool\r
41 */\r
42 private $_flashCompat;\r
43\r
44 /**\r
45 * Cache of %Output.FixInnerHTML.\r
46 * @type bool\r
47 */\r
48 private $_innerHTMLFix;\r
49\r
50 /**\r
51 * Stack for keeping track of object information when outputting IE\r
52 * compatibility code.\r
53 * @type array\r
54 */\r
55 private $_flashStack = array();\r
56\r
57 /**\r
58 * Configuration for the generator\r
59 * @type HTMLPurifier_Config\r
60 */\r
61 protected $config;\r
62\r
63 /**\r
64 * @param HTMLPurifier_Config $config\r
65 * @param HTMLPurifier_Context $context\r
66 */\r
67 public function __construct($config, $context)\r
68 {\r
69 $this->config = $config;\r
70 $this->_scriptFix = $config->get('Output.CommentScriptContents');\r
71 $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');\r
72 $this->_sortAttr = $config->get('Output.SortAttr');\r
73 $this->_flashCompat = $config->get('Output.FlashCompat');\r
74 $this->_def = $config->getHTMLDefinition();\r
75 $this->_xhtml = $this->_def->doctype->xml;\r
76 }\r
77\r
78 /**\r
79 * Generates HTML from an array of tokens.\r
80 * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token\r
81 * @return string Generated HTML\r
82 */\r
83 public function generateFromTokens($tokens)\r
84 {\r
85 if (!$tokens) {\r
86 return '';\r
87 }\r
88\r
89 // Basic algorithm\r
90 $html = '';\r
91 for ($i = 0, $size = count($tokens); $i < $size; $i++) {\r
92 if ($this->_scriptFix && $tokens[$i]->name === 'script'\r
93 && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {\r
94 // script special case\r
95 // the contents of the script block must be ONE token\r
96 // for this to work.\r
97 $html .= $this->generateFromToken($tokens[$i++]);\r
98 $html .= $this->generateScriptFromToken($tokens[$i++]);\r
99 }\r
100 $html .= $this->generateFromToken($tokens[$i]);\r
101 }\r
102\r
103 // Tidy cleanup\r
104 if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {\r
105 $tidy = new Tidy;\r
106 $tidy->parseString(\r
107 $html,\r
108 array(\r
109 'indent'=> true,\r
110 'output-xhtml' => $this->_xhtml,\r
111 'show-body-only' => true,\r
112 'indent-spaces' => 2,\r
113 'wrap' => 68,\r
114 ),\r
115 'utf8'\r
116 );\r
117 $tidy->cleanRepair();\r
118 $html = (string) $tidy; // explicit cast necessary\r
119 }\r
120\r
121 // Normalize newlines to system defined value\r
122 if ($this->config->get('Core.NormalizeNewlines')) {\r
123 $nl = $this->config->get('Output.Newline');\r
124 if ($nl === null) {\r
125 $nl = PHP_EOL;\r
126 }\r
127 if ($nl !== "\n") {\r
128 $html = str_replace("\n", $nl, $html);\r
129 }\r
130 }\r
131 return $html;\r
132 }\r
133\r
134 /**\r
135 * Generates HTML from a single token.\r
136 * @param HTMLPurifier_Token $token HTMLPurifier_Token object.\r
137 * @return string Generated HTML\r
138 */\r
139 public function generateFromToken($token)\r
140 {\r
141 if (!$token instanceof HTMLPurifier_Token) {\r
142 trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);\r
143 return '';\r
144\r
145 } elseif ($token instanceof HTMLPurifier_Token_Start) {\r
146 $attr = $this->generateAttributes($token->attr, $token->name);\r
147 if ($this->_flashCompat) {\r
148 if ($token->name == "object") {\r
149 $flash = new stdclass();\r
150 $flash->attr = $token->attr;\r
151 $flash->param = array();\r
152 $this->_flashStack[] = $flash;\r
153 }\r
154 }\r
155 return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';\r
156\r
157 } elseif ($token instanceof HTMLPurifier_Token_End) {\r
158 $_extra = '';\r
159 if ($this->_flashCompat) {\r
160 if ($token->name == "object" && !empty($this->_flashStack)) {\r
161 // doesn't do anything for now\r
162 }\r
163 }\r
164 return $_extra . '</' . $token->name . '>';\r
165\r
166 } elseif ($token instanceof HTMLPurifier_Token_Empty) {\r
167 if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {\r
168 $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];\r
169 }\r
170 $attr = $this->generateAttributes($token->attr, $token->name);\r
171 return '<' . $token->name . ($attr ? ' ' : '') . $attr .\r
172 ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>\r
173 . '>';\r
174\r
175 } elseif ($token instanceof HTMLPurifier_Token_Text) {\r
176 return $this->escape($token->data, ENT_NOQUOTES);\r
177\r
178 } elseif ($token instanceof HTMLPurifier_Token_Comment) {\r
179 return '<!--' . $token->data . '-->';\r
180 } else {\r
181 return '';\r
182\r
183 }\r
184 }\r
185\r
186 /**\r
187 * Special case processor for the contents of script tags\r
188 * @param HTMLPurifier_Token $token HTMLPurifier_Token object.\r
189 * @return string\r
190 * @warning This runs into problems if there's already a literal\r
191 * --> somewhere inside the script contents.\r
192 */\r
193 public function generateScriptFromToken($token)\r
194 {\r
195 if (!$token instanceof HTMLPurifier_Token_Text) {\r
196 return $this->generateFromToken($token);\r
197 }\r
198 // Thanks <http://lachy.id.au/log/2005/05/script-comments>\r
199 $data = preg_replace('#//\s*$#', '', $token->data);\r
200 return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';\r
201 }\r
202\r
203 /**\r
204 * Generates attribute declarations from attribute array.\r
205 * @note This does not include the leading or trailing space.\r
206 * @param array $assoc_array_of_attributes Attribute array\r
207 * @param string $element Name of element attributes are for, used to check\r
208 * attribute minimization.\r
209 * @return string Generated HTML fragment for insertion.\r
210 */\r
211 public function generateAttributes($assoc_array_of_attributes, $element = '')\r
212 {\r
213 $html = '';\r
214 if ($this->_sortAttr) {\r
215 ksort($assoc_array_of_attributes);\r
216 }\r
217 foreach ($assoc_array_of_attributes as $key => $value) {\r
218 if (!$this->_xhtml) {\r
219 // Remove namespaced attributes\r
220 if (strpos($key, ':') !== false) {\r
221 continue;\r
222 }\r
223 // Check if we should minimize the attribute: val="val" -> val\r
224 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {\r
225 $html .= $key . ' ';\r
226 continue;\r
227 }\r
228 }\r
229 // Workaround for Internet Explorer innerHTML bug.\r
230 // Essentially, Internet Explorer, when calculating\r
231 // innerHTML, omits quotes if there are no instances of\r
232 // angled brackets, quotes or spaces. However, when parsing\r
233 // HTML (for example, when you assign to innerHTML), it\r
234 // treats backticks as quotes. Thus,\r
235 // <img alt="``" />\r
236 // becomes\r
237 // <img alt=`` />\r
238 // becomes\r
239 // <img alt='' />\r
240 // Fortunately, all we need to do is trigger an appropriate\r
241 // quoting style, which we do by adding an extra space.\r
242 // This also is consistent with the W3C spec, which states\r
243 // that user agents may ignore leading or trailing\r
244 // whitespace (in fact, most don't, at least for attributes\r
245 // like alt, but an extra space at the end is barely\r
246 // noticeable). Still, we have a configuration knob for\r
247 // this, since this transformation is not necesary if you\r
248 // don't process user input with innerHTML or you don't plan\r
249 // on supporting Internet Explorer.\r
250 if ($this->_innerHTMLFix) {\r
251 if (strpos($value, '`') !== false) {\r
252 // check if correct quoting style would not already be\r
253 // triggered\r
254 if (strcspn($value, '"\' <>') === strlen($value)) {\r
255 // protect!\r
256 $value .= ' ';\r
257 }\r
258 }\r
259 }\r
260 $html .= $key.'="'.$this->escape($value).'" ';\r
261 }\r
262 return rtrim($html);\r
263 }\r
264\r
265 /**\r
266 * Escapes raw text data.\r
267 * @todo This really ought to be protected, but until we have a facility\r
268 * for properly generating HTML here w/o using tokens, it stays\r
269 * public.\r
270 * @param string $string String data to escape for HTML.\r
271 * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is\r
272 * permissible for non-attribute output.\r
273 * @return string escaped data.\r
274 */\r
275 public function escape($string, $quote = null)\r
276 {\r
277 // Workaround for APC bug on Mac Leopard reported by sidepodcast\r
278 // http://htmlpurifier.org/phorum/read.php?3,4823,4846\r
279 if ($quote === null) {\r
280 $quote = ENT_COMPAT;\r
281 }\r
282 return htmlspecialchars($string, $quote, 'UTF-8');\r
283 }\r
284}\r
285\r
286// vim: et sw=4 sts=4\r