]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php
remove autoload section in composer.json
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / Injector / AutoParagraph.php
CommitLineData
d4949327
NL
1<?php\r
2\r
3/**\r
4 * Injector that auto paragraphs text in the root node based on\r
5 * double-spacing.\r
6 * @todo Ensure all states are unit tested, including variations as well.\r
7 * @todo Make a graph of the flow control for this Injector.\r
8 */\r
9class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector\r
10{\r
11 /**\r
12 * @type string\r
13 */\r
14 public $name = 'AutoParagraph';\r
15\r
16 /**\r
17 * @type array\r
18 */\r
19 public $needed = array('p');\r
20\r
21 /**\r
22 * @return HTMLPurifier_Token_Start\r
23 */\r
24 private function _pStart()\r
25 {\r
26 $par = new HTMLPurifier_Token_Start('p');\r
27 $par->armor['MakeWellFormed_TagClosedError'] = true;\r
28 return $par;\r
29 }\r
30\r
31 /**\r
32 * @param HTMLPurifier_Token_Text $token\r
33 */\r
34 public function handleText(&$token)\r
35 {\r
36 $text = $token->data;\r
37 // Does the current parent allow <p> tags?\r
38 if ($this->allowsElement('p')) {\r
39 if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {\r
40 // Note that we have differing behavior when dealing with text\r
41 // in the anonymous root node, or a node inside the document.\r
42 // If the text as a double-newline, the treatment is the same;\r
43 // if it doesn't, see the next if-block if you're in the document.\r
44\r
45 $i = $nesting = null;\r
46 if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {\r
47 // State 1.1: ... ^ (whitespace, then document end)\r
48 // ----\r
49 // This is a degenerate case\r
50 } else {\r
51 if (!$token->is_whitespace || $this->_isInline($current)) {\r
52 // State 1.2: PAR1\r
53 // ----\r
54\r
55 // State 1.3: PAR1\n\nPAR2\r
56 // ------------\r
57\r
58 // State 1.4: <div>PAR1\n\nPAR2 (see State 2)\r
59 // ------------\r
60 $token = array($this->_pStart());\r
61 $this->_splitText($text, $token);\r
62 } else {\r
63 // State 1.5: \n<hr />\r
64 // --\r
65 }\r
66 }\r
67 } else {\r
68 // State 2: <div>PAR1... (similar to 1.4)\r
69 // ----\r
70\r
71 // We're in an element that allows paragraph tags, but we're not\r
72 // sure if we're going to need them.\r
73 if ($this->_pLookAhead()) {\r
74 // State 2.1: <div>PAR1<b>PAR1\n\nPAR2\r
75 // ----\r
76 // Note: This will always be the first child, since any\r
77 // previous inline element would have triggered this very\r
78 // same routine, and found the double newline. One possible\r
79 // exception would be a comment.\r
80 $token = array($this->_pStart(), $token);\r
81 } else {\r
82 // State 2.2.1: <div>PAR1<div>\r
83 // ----\r
84\r
85 // State 2.2.2: <div>PAR1<b>PAR1</b></div>\r
86 // ----\r
87 }\r
88 }\r
89 // Is the current parent a <p> tag?\r
90 } elseif (!empty($this->currentNesting) &&\r
91 $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {\r
92 // State 3.1: ...<p>PAR1\r
93 // ----\r
94\r
95 // State 3.2: ...<p>PAR1\n\nPAR2\r
96 // ------------\r
97 $token = array();\r
98 $this->_splitText($text, $token);\r
99 // Abort!\r
100 } else {\r
101 // State 4.1: ...<b>PAR1\r
102 // ----\r
103\r
104 // State 4.2: ...<b>PAR1\n\nPAR2\r
105 // ------------\r
106 }\r
107 }\r
108\r
109 /**\r
110 * @param HTMLPurifier_Token $token\r
111 */\r
112 public function handleElement(&$token)\r
113 {\r
114 // We don't have to check if we're already in a <p> tag for block\r
115 // tokens, because the tag would have been autoclosed by MakeWellFormed.\r
116 if ($this->allowsElement('p')) {\r
117 if (!empty($this->currentNesting)) {\r
118 if ($this->_isInline($token)) {\r
119 // State 1: <div>...<b>\r
120 // ---\r
121 // Check if this token is adjacent to the parent token\r
122 // (seek backwards until token isn't whitespace)\r
123 $i = null;\r
124 $this->backward($i, $prev);\r
125\r
126 if (!$prev instanceof HTMLPurifier_Token_Start) {\r
127 // Token wasn't adjacent\r
128 if ($prev instanceof HTMLPurifier_Token_Text &&\r
129 substr($prev->data, -2) === "\n\n"\r
130 ) {\r
131 // State 1.1.4: <div><p>PAR1</p>\n\n<b>\r
132 // ---\r
133 // Quite frankly, this should be handled by splitText\r
134 $token = array($this->_pStart(), $token);\r
135 } else {\r
136 // State 1.1.1: <div><p>PAR1</p><b>\r
137 // ---\r
138 // State 1.1.2: <div><br /><b>\r
139 // ---\r
140 // State 1.1.3: <div>PAR<b>\r
141 // ---\r
142 }\r
143 } else {\r
144 // State 1.2.1: <div><b>\r
145 // ---\r
146 // Lookahead to see if <p> is needed.\r
147 if ($this->_pLookAhead()) {\r
148 // State 1.3.1: <div><b>PAR1\n\nPAR2\r
149 // ---\r
150 $token = array($this->_pStart(), $token);\r
151 } else {\r
152 // State 1.3.2: <div><b>PAR1</b></div>\r
153 // ---\r
154\r
155 // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>\r
156 // ---\r
157 }\r
158 }\r
159 } else {\r
160 // State 2.3: ...<div>\r
161 // -----\r
162 }\r
163 } else {\r
164 if ($this->_isInline($token)) {\r
165 // State 3.1: <b>\r
166 // ---\r
167 // This is where the {p} tag is inserted, not reflected in\r
168 // inputTokens yet, however.\r
169 $token = array($this->_pStart(), $token);\r
170 } else {\r
171 // State 3.2: <div>\r
172 // -----\r
173 }\r
174\r
175 $i = null;\r
176 if ($this->backward($i, $prev)) {\r
177 if (!$prev instanceof HTMLPurifier_Token_Text) {\r
178 // State 3.1.1: ...</p>{p}<b>\r
179 // ---\r
180 // State 3.2.1: ...</p><div>\r
181 // -----\r
182 if (!is_array($token)) {\r
183 $token = array($token);\r
184 }\r
185 array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));\r
186 } else {\r
187 // State 3.1.2: ...</p>\n\n{p}<b>\r
188 // ---\r
189 // State 3.2.2: ...</p>\n\n<div>\r
190 // -----\r
191 // Note: PAR<ELEM> cannot occur because PAR would have been\r
192 // wrapped in <p> tags.\r
193 }\r
194 }\r
195 }\r
196 } else {\r
197 // State 2.2: <ul><li>\r
198 // ----\r
199 // State 2.4: <p><b>\r
200 // ---\r
201 }\r
202 }\r
203\r
204 /**\r
205 * Splits up a text in paragraph tokens and appends them\r
206 * to the result stream that will replace the original\r
207 * @param string $data String text data that will be processed\r
208 * into paragraphs\r
209 * @param HTMLPurifier_Token[] $result Reference to array of tokens that the\r
210 * tags will be appended onto\r
211 */\r
212 private function _splitText($data, &$result)\r
213 {\r
214 $raw_paragraphs = explode("\n\n", $data);\r
215 $paragraphs = array(); // without empty paragraphs\r
216 $needs_start = false;\r
217 $needs_end = false;\r
218\r
219 $c = count($raw_paragraphs);\r
220 if ($c == 1) {\r
221 // There were no double-newlines, abort quickly. In theory this\r
222 // should never happen.\r
223 $result[] = new HTMLPurifier_Token_Text($data);\r
224 return;\r
225 }\r
226 for ($i = 0; $i < $c; $i++) {\r
227 $par = $raw_paragraphs[$i];\r
228 if (trim($par) !== '') {\r
229 $paragraphs[] = $par;\r
230 } else {\r
231 if ($i == 0) {\r
232 // Double newline at the front\r
233 if (empty($result)) {\r
234 // The empty result indicates that the AutoParagraph\r
235 // injector did not add any start paragraph tokens.\r
236 // This means that we have been in a paragraph for\r
237 // a while, and the newline means we should start a new one.\r
238 $result[] = new HTMLPurifier_Token_End('p');\r
239 $result[] = new HTMLPurifier_Token_Text("\n\n");\r
240 // However, the start token should only be added if\r
241 // there is more processing to be done (i.e. there are\r
242 // real paragraphs in here). If there are none, the\r
243 // next start paragraph tag will be handled by the\r
244 // next call to the injector\r
245 $needs_start = true;\r
246 } else {\r
247 // We just started a new paragraph!\r
248 // Reinstate a double-newline for presentation's sake, since\r
249 // it was in the source code.\r
250 array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));\r
251 }\r
252 } elseif ($i + 1 == $c) {\r
253 // Double newline at the end\r
254 // There should be a trailing </p> when we're finally done.\r
255 $needs_end = true;\r
256 }\r
257 }\r
258 }\r
259\r
260 // Check if this was just a giant blob of whitespace. Move this earlier,\r
261 // perhaps?\r
262 if (empty($paragraphs)) {\r
263 return;\r
264 }\r
265\r
266 // Add the start tag indicated by \n\n at the beginning of $data\r
267 if ($needs_start) {\r
268 $result[] = $this->_pStart();\r
269 }\r
270\r
271 // Append the paragraphs onto the result\r
272 foreach ($paragraphs as $par) {\r
273 $result[] = new HTMLPurifier_Token_Text($par);\r
274 $result[] = new HTMLPurifier_Token_End('p');\r
275 $result[] = new HTMLPurifier_Token_Text("\n\n");\r
276 $result[] = $this->_pStart();\r
277 }\r
278\r
279 // Remove trailing start token; Injector will handle this later if\r
280 // it was indeed needed. This prevents from needing to do a lookahead,\r
281 // at the cost of a lookbehind later.\r
282 array_pop($result);\r
283\r
284 // If there is no need for an end tag, remove all of it and let\r
285 // MakeWellFormed close it later.\r
286 if (!$needs_end) {\r
287 array_pop($result); // removes \n\n\r
288 array_pop($result); // removes </p>\r
289 }\r
290 }\r
291\r
292 /**\r
293 * Returns true if passed token is inline (and, ergo, allowed in\r
294 * paragraph tags)\r
295 * @param HTMLPurifier_Token $token\r
296 * @return bool\r
297 */\r
298 private function _isInline($token)\r
299 {\r
300 return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);\r
301 }\r
302\r
303 /**\r
304 * Looks ahead in the token list and determines whether or not we need\r
305 * to insert a <p> tag.\r
306 * @return bool\r
307 */\r
308 private function _pLookAhead()\r
309 {\r
310 if ($this->currentToken instanceof HTMLPurifier_Token_Start) {\r
311 $nesting = 1;\r
312 } else {\r
313 $nesting = 0;\r
314 }\r
315 $ok = false;\r
316 $i = null;\r
317 while ($this->forwardUntilEndToken($i, $current, $nesting)) {\r
318 $result = $this->_checkNeedsP($current);\r
319 if ($result !== null) {\r
320 $ok = $result;\r
321 break;\r
322 }\r
323 }\r
324 return $ok;\r
325 }\r
326\r
327 /**\r
328 * Determines if a particular token requires an earlier inline token\r
329 * to get a paragraph. This should be used with _forwardUntilEndToken\r
330 * @param HTMLPurifier_Token $current\r
331 * @return bool\r
332 */\r
333 private function _checkNeedsP($current)\r
334 {\r
335 if ($current instanceof HTMLPurifier_Token_Start) {\r
336 if (!$this->_isInline($current)) {\r
337 // <div>PAR1<div>\r
338 // ----\r
339 // Terminate early, since we hit a block element\r
340 return false;\r
341 }\r
342 } elseif ($current instanceof HTMLPurifier_Token_Text) {\r
343 if (strpos($current->data, "\n\n") !== false) {\r
344 // <div>PAR1<b>PAR1\n\nPAR2\r
345 // ----\r
346 return true;\r
347 } else {\r
348 // <div>PAR1<b>PAR1...\r
349 // ----\r
350 }\r
351 }\r
352 return null;\r
353 }\r
354}\r
355\r
356// vim: et sw=4 sts=4\r