+ $tokens + ); + return $tokens; + } + + /** + * Iterative function that tokenizes a node, putting it into an accumulator. + * To iterate is human, to recurse divine - L. Peter Deutsch + * @param DOMNode $node DOMNode to be tokenized. + * @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens. + * @return HTMLPurifier_Token of node appended to previously passed tokens. + */ + protected function tokenizeDOM($node, &$tokens) + { + $level = 0; + $nodes = array($level => new HTMLPurifier_Queue(array($node))); + $closingNodes = array(); + do { + while (!$nodes[$level]->isEmpty()) { + $node = $nodes[$level]->shift(); // FIFO + $collect = $level > 0 ? true : false; + $needEndingTag = $this->createStartNode($node, $tokens, $collect); + if ($needEndingTag) { + $closingNodes[$level][] = $node; + } + if ($node->childNodes && $node->childNodes->length) { + $level++; + $nodes[$level] = new HTMLPurifier_Queue(); + foreach ($node->childNodes as $childNode) { + $nodes[$level]->push($childNode); + } + } + } + $level--; + if ($level && isset($closingNodes[$level])) { + while ($node = array_pop($closingNodes[$level])) { + $this->createEndNode($node, $tokens); + } + } + } while ($level > 0); + } + + /** + * @param DOMNode $node DOMNode to be tokenized. + * @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens. + * @param bool $collect Says whether or start and close are collected, set to + * false at first recursion because it's the implicit DIV + * tag you're dealing with. + * @return bool if the token needs an endtoken + * @todo data and tagName properties don't seem to exist in DOMNode? + */ + protected function createStartNode($node, &$tokens, $collect) + { + // intercept non element nodes. WE MUST catch all of them, + // but we're not getting the character reference nodes because + // those should have been preprocessed + if ($node->nodeType === XML_TEXT_NODE) { + $tokens[] = $this->factory->createText($node->data); + return false; + } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { + // undo libxml's special treatment of