From 42c80841c846610be280218d53fcde06b0f0063b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 6 Dec 2013 09:45:27 +0100 Subject: [change] we now use Full-Text RSS 3.1, thank you so much @fivefilters --- .../simplepie/library/SimplePie/Sanitize.php | 549 +++++++++++++++++++++ 1 file changed, 549 insertions(+) create mode 100644 inc/3rdparty/libraries/simplepie/library/SimplePie/Sanitize.php (limited to 'inc/3rdparty/libraries/simplepie/library/SimplePie/Sanitize.php') diff --git a/inc/3rdparty/libraries/simplepie/library/SimplePie/Sanitize.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Sanitize.php new file mode 100644 index 00000000..6810cc49 --- /dev/null +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Sanitize.php @@ -0,0 +1,549 @@ +set_url_replacements(null); + } + + public function remove_div($enable = true) + { + $this->remove_div = (bool) $enable; + } + + public function set_image_handler($page = false) + { + if ($page) + { + $this->image_handler = (string) $page; + } + else + { + $this->image_handler = false; + } + } + + public function set_registry(SimplePie_Registry $registry) + { + $this->registry = $registry; + } + + public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') + { + if (isset($enable_cache)) + { + $this->enable_cache = (bool) $enable_cache; + } + + if ($cache_location) + { + $this->cache_location = (string) $cache_location; + } + + if ($cache_name_function) + { + $this->cache_name_function = (string) $cache_name_function; + } + } + + public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) + { + if ($timeout) + { + $this->timeout = (string) $timeout; + } + + if ($useragent) + { + $this->useragent = (string) $useragent; + } + + if ($force_fsockopen) + { + $this->force_fsockopen = (string) $force_fsockopen; + } + } + + public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) + { + if ($tags) + { + if (is_array($tags)) + { + $this->strip_htmltags = $tags; + } + else + { + $this->strip_htmltags = explode(',', $tags); + } + } + else + { + $this->strip_htmltags = false; + } + } + + public function encode_instead_of_strip($encode = false) + { + $this->encode_instead_of_strip = (bool) $encode; + } + + public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) + { + if ($attribs) + { + if (is_array($attribs)) + { + $this->strip_attributes = $attribs; + } + else + { + $this->strip_attributes = explode(',', $attribs); + } + } + else + { + $this->strip_attributes = false; + } + } + + public function strip_comments($strip = false) + { + $this->strip_comments = (bool) $strip; + } + + public function set_output_encoding($encoding = 'UTF-8') + { + $this->output_encoding = (string) $encoding; + } + + /** + * Set element/attribute key/value pairs of HTML attributes + * containing URLs that need to be resolved relative to the feed + * + * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite, + * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite, + * |q|@cite + * + * @since 1.0 + * @param array|null $element_attribute Element/attribute key/value pairs, null for default + */ + public function set_url_replacements($element_attribute = null) + { + if ($element_attribute === null) + { + $element_attribute = array( + 'a' => 'href', + 'area' => 'href', + 'blockquote' => 'cite', + 'del' => 'cite', + 'form' => 'action', + 'img' => array( + 'longdesc', + 'src' + ), + 'input' => 'src', + 'ins' => 'cite', + 'q' => 'cite' + ); + } + $this->replace_url_attributes = (array) $element_attribute; + } + + public function sanitize($data, $type, $base = '') + { + $data = trim($data); + if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) + { + if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) + { + if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) + { + $type |= SIMPLEPIE_CONSTRUCT_HTML; + } + else + { + $type |= SIMPLEPIE_CONSTRUCT_TEXT; + } + } + + if ($type & SIMPLEPIE_CONSTRUCT_BASE64) + { + $data = base64_decode($data); + } + + if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) + { + + $document = new DOMDocument(); + $document->encoding = 'UTF-8'; + $data = $this->preprocess($data, $type); + + set_error_handler(array('SimplePie_Misc', 'silence_errors')); + $document->loadHTML($data); + restore_error_handler(); + + // Strip comments + if ($this->strip_comments) + { + $xpath = new DOMXPath($document); + $comments = $xpath->query('//comment()'); + + foreach ($comments as $comment) + { + $comment->parentNode->removeChild($comment); + } + } + + // Strip out HTML tags and attributes that might cause various security problems. + // Based on recommendations by Mark Pilgrim at: + // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely + if ($this->strip_htmltags) + { + foreach ($this->strip_htmltags as $tag) + { + $this->strip_tag($tag, $document, $type); + } + } + + if ($this->strip_attributes) + { + foreach ($this->strip_attributes as $attrib) + { + $this->strip_attr($attrib, $document); + } + } + + // Replace relative URLs + $this->base = $base; + foreach ($this->replace_url_attributes as $element => $attributes) + { + $this->replace_urls($document, $element, $attributes); + } + + // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. + if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) + { + $images = $document->getElementsByTagName('img'); + foreach ($images as $img) + { + if ($img->hasAttribute('src')) + { + $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); + $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); + + if ($cache->load()) + { + $img->setAttribute('src', $this->image_handler . $image_url); + } + else + { + $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); + $headers = $file->headers; + + if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) + { + if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) + { + $img->setAttribute('src', $this->image_handler . $image_url); + } + else + { + trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); + } + } + } + } + } + } + + // Remove the DOCTYPE + // Seems to cause segfaulting if we don't do this + if ($document->firstChild instanceof DOMDocumentType) + { + $document->removeChild($document->firstChild); + } + + // Move everything from the body to the root + $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0); + $document->replaceChild($real_body, $document->firstChild); + + // Finally, convert to a HTML string + $data = trim($document->saveHTML()); + + if ($this->remove_div) + { + $data = preg_replace('/^/', '', $data); + $data = preg_replace('/<\/div>$/', '', $data); + } + else + { + $data = preg_replace('/^/', '
', $data); + } + } + + if ($type & SIMPLEPIE_CONSTRUCT_IRI) + { + $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); + if ($absolute !== false) + { + $data = $absolute; + } + } + + if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) + { + $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); + } + + if ($this->output_encoding !== 'UTF-8') + { + $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); + } + } + return $data; + } + + protected function preprocess($html, $type) + { + $ret = ''; + if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) + { + // Atom XHTML constructs are wrapped with a div by default + // Note: No protection if $html contains a stray
! + $html = '
' . $html . '
'; + $ret .= ''; + $content_type = 'text/html'; + } + else + { + $ret .= ''; + $content_type = 'application/xhtml+xml'; + } + + $ret .= ''; + $ret .= ''; + $ret .= '' . $html . ''; + return $ret; + } + + public function replace_urls($document, $tag, $attributes) + { + if (!is_array($attributes)) + { + $attributes = array($attributes); + } + + if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) + { + $elements = $document->getElementsByTagName($tag); + foreach ($elements as $element) + { + foreach ($attributes as $attribute) + { + if ($element->hasAttribute($attribute)) + { + $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); + if ($value !== false) + { + $element->setAttribute($attribute, $value); + } + } + } + } + } + } + + public function do_strip_htmltags($match) + { + if ($this->encode_instead_of_strip) + { + if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) + { + $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); + $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); + return "<$match[1]$match[2]>$match[3]</$match[1]>"; + } + else + { + return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); + } + } + elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) + { + return $match[4]; + } + else + { + return ''; + } + } + + protected function strip_tag($tag, $document, $type) + { + $xpath = new DOMXPath($document); + $elements = $xpath->query('body//' . $tag); + if ($this->encode_instead_of_strip) + { + foreach ($elements as $element) + { + $fragment = $document->createDocumentFragment(); + + // For elements which aren't script or style, include the tag itself + if (!in_array($tag, array('script', 'style'))) + { + $text = '<' . $tag; + if ($element->hasAttributes()) + { + $attrs = array(); + foreach ($element->attributes as $name => $attr) + { + $value = $attr->value; + + // In XHTML, empty values should never exist, so we repeat the value + if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) + { + $value = $name; + } + // For HTML, empty is fine + elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) + { + $attrs[] = $name; + continue; + } + + // Standard attribute text + $attrs[] = $name . '="' . $attr->value . '"'; + } + $text .= ' ' . implode(' ', $attrs); + } + $text .= '>'; + $fragment->appendChild(new DOMText($text)); + } + + $number = $element->childNodes->length; + for ($i = $number; $i > 0; $i--) + { + $child = $element->childNodes->item(0); + $fragment->appendChild($child); + } + + if (!in_array($tag, array('script', 'style'))) + { + $fragment->appendChild(new DOMText('')); + } + + $element->parentNode->replaceChild($fragment, $element); + } + + return; + } + elseif (in_array($tag, array('script', 'style'))) + { + foreach ($elements as $element) + { + $element->parentNode->removeChild($element); + } + + return; + } + else + { + foreach ($elements as $element) + { + $fragment = $document->createDocumentFragment(); + $number = $element->childNodes->length; + for ($i = $number; $i > 0; $i--) + { + $child = $element->childNodes->item(0); + $fragment->appendChild($child); + } + + $element->parentNode->replaceChild($fragment, $element); + } + } + } + + protected function strip_attr($attrib, $document) + { + $xpath = new DOMXPath($document); + $elements = $xpath->query('//*[@' . $attrib . ']'); + + foreach ($elements as $element) + { + $element->removeAttribute($attrib); + } + } +} -- cgit v1.2.3