]>
Commit | Line | Data |
---|---|---|
1 | <?php\r | |
2 | \r | |
3 | /**\r | |
4 | * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.\r | |
5 | */\r | |
6 | class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef\r | |
7 | {\r | |
8 | \r | |
9 | /**\r | |
10 | * IPv4 sub-validator.\r | |
11 | * @type HTMLPurifier_AttrDef_URI_IPv4\r | |
12 | */\r | |
13 | protected $ipv4;\r | |
14 | \r | |
15 | /**\r | |
16 | * IPv6 sub-validator.\r | |
17 | * @type HTMLPurifier_AttrDef_URI_IPv6\r | |
18 | */\r | |
19 | protected $ipv6;\r | |
20 | \r | |
21 | public function __construct()\r | |
22 | {\r | |
23 | $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();\r | |
24 | $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();\r | |
25 | }\r | |
26 | \r | |
27 | /**\r | |
28 | * @param string $string\r | |
29 | * @param HTMLPurifier_Config $config\r | |
30 | * @param HTMLPurifier_Context $context\r | |
31 | * @return bool|string\r | |
32 | */\r | |
33 | public function validate($string, $config, $context)\r | |
34 | {\r | |
35 | $length = strlen($string);\r | |
36 | // empty hostname is OK; it's usually semantically equivalent:\r | |
37 | // the default host as defined by a URI scheme is used:\r | |
38 | //\r | |
39 | // If the URI scheme defines a default for host, then that\r | |
40 | // default applies when the host subcomponent is undefined\r | |
41 | // or when the registered name is empty (zero length).\r | |
42 | if ($string === '') {\r | |
43 | return '';\r | |
44 | }\r | |
45 | if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') {\r | |
46 | //IPv6\r | |
47 | $ip = substr($string, 1, $length - 2);\r | |
48 | $valid = $this->ipv6->validate($ip, $config, $context);\r | |
49 | if ($valid === false) {\r | |
50 | return false;\r | |
51 | }\r | |
52 | return '[' . $valid . ']';\r | |
53 | }\r | |
54 | \r | |
55 | // need to do checks on unusual encodings too\r | |
56 | $ipv4 = $this->ipv4->validate($string, $config, $context);\r | |
57 | if ($ipv4 !== false) {\r | |
58 | return $ipv4;\r | |
59 | }\r | |
60 | \r | |
61 | // A regular domain name.\r | |
62 | \r | |
63 | // This doesn't match I18N domain names, but we don't have proper IRI support,\r | |
64 | // so force users to insert Punycode.\r | |
65 | \r | |
66 | // There is not a good sense in which underscores should be\r | |
67 | // allowed, since it's technically not! (And if you go as\r | |
68 | // far to allow everything as specified by the DNS spec...\r | |
69 | // well, that's literally everything, modulo some space limits\r | |
70 | // for the components and the overall name (which, by the way,\r | |
71 | // we are NOT checking!). So we (arbitrarily) decide this:\r | |
72 | // let's allow underscores wherever we would have allowed\r | |
73 | // hyphens, if they are enabled. This is a pretty good match\r | |
74 | // for browser behavior, for example, a large number of browsers\r | |
75 | // cannot handle foo_.example.com, but foo_bar.example.com is\r | |
76 | // fairly well supported.\r | |
77 | $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';\r | |
78 | \r | |
79 | // The productions describing this are:\r | |
80 | $a = '[a-z]'; // alpha\r | |
81 | $an = '[a-z0-9]'; // alphanum\r | |
82 | $and = "[a-z0-9-$underscore]"; // alphanum | "-"\r | |
83 | // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum\r | |
84 | $domainlabel = "$an($and*$an)?";\r | |
85 | // toplabel = alpha | alpha *( alphanum | "-" ) alphanum\r | |
86 | $toplabel = "$a($and*$an)?";\r | |
87 | // hostname = *( domainlabel "." ) toplabel [ "." ]\r | |
88 | if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {\r | |
89 | return $string;\r | |
90 | }\r | |
91 | \r | |
92 | // If we have Net_IDNA2 support, we can support IRIs by\r | |
93 | // punycoding them. (This is the most portable thing to do,\r | |
94 | // since otherwise we have to assume browsers support\r | |
95 | \r | |
96 | if ($config->get('Core.EnableIDNA')) {\r | |
97 | $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));\r | |
98 | // we need to encode each period separately\r | |
99 | $parts = explode('.', $string);\r | |
100 | try {\r | |
101 | $new_parts = array();\r | |
102 | foreach ($parts as $part) {\r | |
103 | $encodable = false;\r | |
104 | for ($i = 0, $c = strlen($part); $i < $c; $i++) {\r | |
105 | if (ord($part[$i]) > 0x7a) {\r | |
106 | $encodable = true;\r | |
107 | break;\r | |
108 | }\r | |
109 | }\r | |
110 | if (!$encodable) {\r | |
111 | $new_parts[] = $part;\r | |
112 | } else {\r | |
113 | $new_parts[] = $idna->encode($part);\r | |
114 | }\r | |
115 | }\r | |
116 | $string = implode('.', $new_parts);\r | |
117 | if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {\r | |
118 | return $string;\r | |
119 | }\r | |
120 | } catch (Exception $e) {\r | |
121 | // XXX error reporting\r | |
122 | }\r | |
123 | }\r | |
124 | return false;\r | |
125 | }\r | |
126 | }\r | |
127 | \r | |
128 | // vim: et sw=4 sts=4\r |