--- /dev/null
+<?php\r
+\r
+/**\r
+ * HTML Purifier's internal representation of a URI.\r
+ * @note\r
+ * Internal data-structures are completely escaped. If the data needs\r
+ * to be used in a non-URI context (which is very unlikely), be sure\r
+ * to decode it first. The URI may not necessarily be well-formed until\r
+ * validate() is called.\r
+ */\r
+class HTMLPurifier_URI\r
+{\r
+ /**\r
+ * @type string\r
+ */\r
+ public $scheme;\r
+\r
+ /**\r
+ * @type string\r
+ */\r
+ public $userinfo;\r
+\r
+ /**\r
+ * @type string\r
+ */\r
+ public $host;\r
+\r
+ /**\r
+ * @type int\r
+ */\r
+ public $port;\r
+\r
+ /**\r
+ * @type string\r
+ */\r
+ public $path;\r
+\r
+ /**\r
+ * @type string\r
+ */\r
+ public $query;\r
+\r
+ /**\r
+ * @type string\r
+ */\r
+ public $fragment;\r
+\r
+ /**\r
+ * @param string $scheme\r
+ * @param string $userinfo\r
+ * @param string $host\r
+ * @param int $port\r
+ * @param string $path\r
+ * @param string $query\r
+ * @param string $fragment\r
+ * @note Automatically normalizes scheme and port\r
+ */\r
+ public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)\r
+ {\r
+ $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);\r
+ $this->userinfo = $userinfo;\r
+ $this->host = $host;\r
+ $this->port = is_null($port) ? $port : (int)$port;\r
+ $this->path = $path;\r
+ $this->query = $query;\r
+ $this->fragment = $fragment;\r
+ }\r
+\r
+ /**\r
+ * Retrieves a scheme object corresponding to the URI's scheme/default\r
+ * @param HTMLPurifier_Config $config\r
+ * @param HTMLPurifier_Context $context\r
+ * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI\r
+ */\r
+ public function getSchemeObj($config, $context)\r
+ {\r
+ $registry = HTMLPurifier_URISchemeRegistry::instance();\r
+ if ($this->scheme !== null) {\r
+ $scheme_obj = $registry->getScheme($this->scheme, $config, $context);\r
+ if (!$scheme_obj) {\r
+ return false;\r
+ } // invalid scheme, clean it out\r
+ } else {\r
+ // no scheme: retrieve the default one\r
+ $def = $config->getDefinition('URI');\r
+ $scheme_obj = $def->getDefaultScheme($config, $context);\r
+ if (!$scheme_obj) {\r
+ // something funky happened to the default scheme object\r
+ trigger_error(\r
+ 'Default scheme object "' . $def->defaultScheme . '" was not readable',\r
+ E_USER_WARNING\r
+ );\r
+ return false;\r
+ }\r
+ }\r
+ return $scheme_obj;\r
+ }\r
+\r
+ /**\r
+ * Generic validation method applicable for all schemes. May modify\r
+ * this URI in order to get it into a compliant form.\r
+ * @param HTMLPurifier_Config $config\r
+ * @param HTMLPurifier_Context $context\r
+ * @return bool True if validation/filtering succeeds, false if failure\r
+ */\r
+ public function validate($config, $context)\r
+ {\r
+ // ABNF definitions from RFC 3986\r
+ $chars_sub_delims = '!$&\'()*+,;=';\r
+ $chars_gen_delims = ':/?#[]@';\r
+ $chars_pchar = $chars_sub_delims . ':@';\r
+\r
+ // validate host\r
+ if (!is_null($this->host)) {\r
+ $host_def = new HTMLPurifier_AttrDef_URI_Host();\r
+ $this->host = $host_def->validate($this->host, $config, $context);\r
+ if ($this->host === false) {\r
+ $this->host = null;\r
+ }\r
+ }\r
+\r
+ // validate scheme\r
+ // NOTE: It's not appropriate to check whether or not this\r
+ // scheme is in our registry, since a URIFilter may convert a\r
+ // URI that we don't allow into one we do. So instead, we just\r
+ // check if the scheme can be dropped because there is no host\r
+ // and it is our default scheme.\r
+ if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {\r
+ // support for relative paths is pretty abysmal when the\r
+ // scheme is present, so axe it when possible\r
+ $def = $config->getDefinition('URI');\r
+ if ($def->defaultScheme === $this->scheme) {\r
+ $this->scheme = null;\r
+ }\r
+ }\r
+\r
+ // validate username\r
+ if (!is_null($this->userinfo)) {\r
+ $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');\r
+ $this->userinfo = $encoder->encode($this->userinfo);\r
+ }\r
+\r
+ // validate port\r
+ if (!is_null($this->port)) {\r
+ if ($this->port < 1 || $this->port > 65535) {\r
+ $this->port = null;\r
+ }\r
+ }\r
+\r
+ // validate path\r
+ $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');\r
+ if (!is_null($this->host)) { // this catches $this->host === ''\r
+ // path-abempty (hier and relative)\r
+ // http://www.example.com/my/path\r
+ // //www.example.com/my/path (looks odd, but works, and\r
+ // recognized by most browsers)\r
+ // (this set is valid or invalid on a scheme by scheme\r
+ // basis, so we'll deal with it later)\r
+ // file:///my/path\r
+ // ///my/path\r
+ $this->path = $segments_encoder->encode($this->path);\r
+ } elseif ($this->path !== '') {\r
+ if ($this->path[0] === '/') {\r
+ // path-absolute (hier and relative)\r
+ // http:/my/path\r
+ // /my/path\r
+ if (strlen($this->path) >= 2 && $this->path[1] === '/') {\r
+ // This could happen if both the host gets stripped\r
+ // out\r
+ // http://my/path\r
+ // //my/path\r
+ $this->path = '';\r
+ } else {\r
+ $this->path = $segments_encoder->encode($this->path);\r
+ }\r
+ } elseif (!is_null($this->scheme)) {\r
+ // path-rootless (hier)\r
+ // http:my/path\r
+ // Short circuit evaluation means we don't need to check nz\r
+ $this->path = $segments_encoder->encode($this->path);\r
+ } else {\r
+ // path-noscheme (relative)\r
+ // my/path\r
+ // (once again, not checking nz)\r
+ $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');\r
+ $c = strpos($this->path, '/');\r
+ if ($c !== false) {\r
+ $this->path =\r
+ $segment_nc_encoder->encode(substr($this->path, 0, $c)) .\r
+ $segments_encoder->encode(substr($this->path, $c));\r
+ } else {\r
+ $this->path = $segment_nc_encoder->encode($this->path);\r
+ }\r
+ }\r
+ } else {\r
+ // path-empty (hier and relative)\r
+ $this->path = ''; // just to be safe\r
+ }\r
+\r
+ // qf = query and fragment\r
+ $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');\r
+\r
+ if (!is_null($this->query)) {\r
+ $this->query = $qf_encoder->encode($this->query);\r
+ }\r
+\r
+ if (!is_null($this->fragment)) {\r
+ $this->fragment = $qf_encoder->encode($this->fragment);\r
+ }\r
+ return true;\r
+ }\r
+\r
+ /**\r
+ * Convert URI back to string\r
+ * @return string URI appropriate for output\r
+ */\r
+ public function toString()\r
+ {\r
+ // reconstruct authority\r
+ $authority = null;\r
+ // there is a rendering difference between a null authority\r
+ // (http:foo-bar) and an empty string authority\r
+ // (http:///foo-bar).\r
+ if (!is_null($this->host)) {\r
+ $authority = '';\r
+ if (!is_null($this->userinfo)) {\r
+ $authority .= $this->userinfo . '@';\r
+ }\r
+ $authority .= $this->host;\r
+ if (!is_null($this->port)) {\r
+ $authority .= ':' . $this->port;\r
+ }\r
+ }\r
+\r
+ // Reconstruct the result\r
+ // One might wonder about parsing quirks from browsers after\r
+ // this reconstruction. Unfortunately, parsing behavior depends\r
+ // on what *scheme* was employed (file:///foo is handled *very*\r
+ // differently than http:///foo), so unfortunately we have to\r
+ // defer to the schemes to do the right thing.\r
+ $result = '';\r
+ if (!is_null($this->scheme)) {\r
+ $result .= $this->scheme . ':';\r
+ }\r
+ if (!is_null($authority)) {\r
+ $result .= '//' . $authority;\r
+ }\r
+ $result .= $this->path;\r
+ if (!is_null($this->query)) {\r
+ $result .= '?' . $this->query;\r
+ }\r
+ if (!is_null($this->fragment)) {\r
+ $result .= '#' . $this->fragment;\r
+ }\r
+\r
+ return $result;\r
+ }\r
+\r
+ /**\r
+ * Returns true if this URL might be considered a 'local' URL given\r
+ * the current context. This is true when the host is null, or\r
+ * when it matches the host supplied to the configuration.\r
+ *\r
+ * Note that this does not do any scheme checking, so it is mostly\r
+ * only appropriate for metadata that doesn't care about protocol\r
+ * security. isBenign is probably what you actually want.\r
+ * @param HTMLPurifier_Config $config\r
+ * @param HTMLPurifier_Context $context\r
+ * @return bool\r
+ */\r
+ public function isLocal($config, $context)\r
+ {\r
+ if ($this->host === null) {\r
+ return true;\r
+ }\r
+ $uri_def = $config->getDefinition('URI');\r
+ if ($uri_def->host === $this->host) {\r
+ return true;\r
+ }\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * Returns true if this URL should be considered a 'benign' URL,\r
+ * that is:\r
+ *\r
+ * - It is a local URL (isLocal), and\r
+ * - It has a equal or better level of security\r
+ * @param HTMLPurifier_Config $config\r
+ * @param HTMLPurifier_Context $context\r
+ * @return bool\r
+ */\r
+ public function isBenign($config, $context)\r
+ {\r
+ if (!$this->isLocal($config, $context)) {\r
+ return false;\r
+ }\r
+\r
+ $scheme_obj = $this->getSchemeObj($config, $context);\r
+ if (!$scheme_obj) {\r
+ return false;\r
+ } // conservative approach\r
+\r
+ $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);\r
+ if ($current_scheme_obj->secure) {\r
+ if (!$scheme_obj->secure) {\r
+ return false;\r
+ }\r
+ }\r
+ return true;\r
+ }\r
+}\r
+\r
+// vim: et sw=4 sts=4\r