]>
Commit | Line | Data |
---|---|---|
15c0b25d AP |
1 | // Copyright 2016 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
107c1cdb | 5 | package httpguts |
15c0b25d AP |
6 | |
7 | import ( | |
8 | "net" | |
9 | "strings" | |
10 | "unicode/utf8" | |
11 | ||
12 | "golang.org/x/net/idna" | |
13 | ) | |
14 | ||
15 | var isTokenTable = [127]bool{ | |
16 | '!': true, | |
17 | '#': true, | |
18 | '$': true, | |
19 | '%': true, | |
20 | '&': true, | |
21 | '\'': true, | |
22 | '*': true, | |
23 | '+': true, | |
24 | '-': true, | |
25 | '.': true, | |
26 | '0': true, | |
27 | '1': true, | |
28 | '2': true, | |
29 | '3': true, | |
30 | '4': true, | |
31 | '5': true, | |
32 | '6': true, | |
33 | '7': true, | |
34 | '8': true, | |
35 | '9': true, | |
36 | 'A': true, | |
37 | 'B': true, | |
38 | 'C': true, | |
39 | 'D': true, | |
40 | 'E': true, | |
41 | 'F': true, | |
42 | 'G': true, | |
43 | 'H': true, | |
44 | 'I': true, | |
45 | 'J': true, | |
46 | 'K': true, | |
47 | 'L': true, | |
48 | 'M': true, | |
49 | 'N': true, | |
50 | 'O': true, | |
51 | 'P': true, | |
52 | 'Q': true, | |
53 | 'R': true, | |
54 | 'S': true, | |
55 | 'T': true, | |
56 | 'U': true, | |
57 | 'W': true, | |
58 | 'V': true, | |
59 | 'X': true, | |
60 | 'Y': true, | |
61 | 'Z': true, | |
62 | '^': true, | |
63 | '_': true, | |
64 | '`': true, | |
65 | 'a': true, | |
66 | 'b': true, | |
67 | 'c': true, | |
68 | 'd': true, | |
69 | 'e': true, | |
70 | 'f': true, | |
71 | 'g': true, | |
72 | 'h': true, | |
73 | 'i': true, | |
74 | 'j': true, | |
75 | 'k': true, | |
76 | 'l': true, | |
77 | 'm': true, | |
78 | 'n': true, | |
79 | 'o': true, | |
80 | 'p': true, | |
81 | 'q': true, | |
82 | 'r': true, | |
83 | 's': true, | |
84 | 't': true, | |
85 | 'u': true, | |
86 | 'v': true, | |
87 | 'w': true, | |
88 | 'x': true, | |
89 | 'y': true, | |
90 | 'z': true, | |
91 | '|': true, | |
92 | '~': true, | |
93 | } | |
94 | ||
95 | func IsTokenRune(r rune) bool { | |
96 | i := int(r) | |
97 | return i < len(isTokenTable) && isTokenTable[i] | |
98 | } | |
99 | ||
100 | func isNotToken(r rune) bool { | |
101 | return !IsTokenRune(r) | |
102 | } | |
103 | ||
104 | // HeaderValuesContainsToken reports whether any string in values | |
105 | // contains the provided token, ASCII case-insensitively. | |
106 | func HeaderValuesContainsToken(values []string, token string) bool { | |
107 | for _, v := range values { | |
108 | if headerValueContainsToken(v, token) { | |
109 | return true | |
110 | } | |
111 | } | |
112 | return false | |
113 | } | |
114 | ||
115 | // isOWS reports whether b is an optional whitespace byte, as defined | |
116 | // by RFC 7230 section 3.2.3. | |
117 | func isOWS(b byte) bool { return b == ' ' || b == '\t' } | |
118 | ||
119 | // trimOWS returns x with all optional whitespace removes from the | |
120 | // beginning and end. | |
121 | func trimOWS(x string) string { | |
122 | // TODO: consider using strings.Trim(x, " \t") instead, | |
123 | // if and when it's fast enough. See issue 10292. | |
124 | // But this ASCII-only code will probably always beat UTF-8 | |
125 | // aware code. | |
126 | for len(x) > 0 && isOWS(x[0]) { | |
127 | x = x[1:] | |
128 | } | |
129 | for len(x) > 0 && isOWS(x[len(x)-1]) { | |
130 | x = x[:len(x)-1] | |
131 | } | |
132 | return x | |
133 | } | |
134 | ||
135 | // headerValueContainsToken reports whether v (assumed to be a | |
136 | // 0#element, in the ABNF extension described in RFC 7230 section 7) | |
137 | // contains token amongst its comma-separated tokens, ASCII | |
138 | // case-insensitively. | |
139 | func headerValueContainsToken(v string, token string) bool { | |
140 | v = trimOWS(v) | |
141 | if comma := strings.IndexByte(v, ','); comma != -1 { | |
142 | return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token) | |
143 | } | |
144 | return tokenEqual(v, token) | |
145 | } | |
146 | ||
147 | // lowerASCII returns the ASCII lowercase version of b. | |
148 | func lowerASCII(b byte) byte { | |
149 | if 'A' <= b && b <= 'Z' { | |
150 | return b + ('a' - 'A') | |
151 | } | |
152 | return b | |
153 | } | |
154 | ||
155 | // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. | |
156 | func tokenEqual(t1, t2 string) bool { | |
157 | if len(t1) != len(t2) { | |
158 | return false | |
159 | } | |
160 | for i, b := range t1 { | |
161 | if b >= utf8.RuneSelf { | |
162 | // No UTF-8 or non-ASCII allowed in tokens. | |
163 | return false | |
164 | } | |
165 | if lowerASCII(byte(b)) != lowerASCII(t2[i]) { | |
166 | return false | |
167 | } | |
168 | } | |
169 | return true | |
170 | } | |
171 | ||
172 | // isLWS reports whether b is linear white space, according | |
173 | // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 | |
174 | // LWS = [CRLF] 1*( SP | HT ) | |
175 | func isLWS(b byte) bool { return b == ' ' || b == '\t' } | |
176 | ||
177 | // isCTL reports whether b is a control byte, according | |
178 | // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 | |
179 | // CTL = <any US-ASCII control character | |
180 | // (octets 0 - 31) and DEL (127)> | |
181 | func isCTL(b byte) bool { | |
182 | const del = 0x7f // a CTL | |
183 | return b < ' ' || b == del | |
184 | } | |
185 | ||
186 | // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name. | |
187 | // HTTP/2 imposes the additional restriction that uppercase ASCII | |
188 | // letters are not allowed. | |
189 | // | |
190 | // RFC 7230 says: | |
191 | // header-field = field-name ":" OWS field-value OWS | |
192 | // field-name = token | |
193 | // token = 1*tchar | |
194 | // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / | |
195 | // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA | |
196 | func ValidHeaderFieldName(v string) bool { | |
197 | if len(v) == 0 { | |
198 | return false | |
199 | } | |
200 | for _, r := range v { | |
201 | if !IsTokenRune(r) { | |
202 | return false | |
203 | } | |
204 | } | |
205 | return true | |
206 | } | |
207 | ||
208 | // ValidHostHeader reports whether h is a valid host header. | |
209 | func ValidHostHeader(h string) bool { | |
210 | // The latest spec is actually this: | |
211 | // | |
212 | // http://tools.ietf.org/html/rfc7230#section-5.4 | |
213 | // Host = uri-host [ ":" port ] | |
214 | // | |
215 | // Where uri-host is: | |
216 | // http://tools.ietf.org/html/rfc3986#section-3.2.2 | |
217 | // | |
218 | // But we're going to be much more lenient for now and just | |
219 | // search for any byte that's not a valid byte in any of those | |
220 | // expressions. | |
221 | for i := 0; i < len(h); i++ { | |
222 | if !validHostByte[h[i]] { | |
223 | return false | |
224 | } | |
225 | } | |
226 | return true | |
227 | } | |
228 | ||
229 | // See the validHostHeader comment. | |
230 | var validHostByte = [256]bool{ | |
231 | '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, | |
232 | '8': true, '9': true, | |
233 | ||
234 | 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, | |
235 | 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, | |
236 | 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, | |
237 | 'y': true, 'z': true, | |
238 | ||
239 | 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, | |
240 | 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, | |
241 | 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, | |
242 | 'Y': true, 'Z': true, | |
243 | ||
244 | '!': true, // sub-delims | |
245 | '$': true, // sub-delims | |
246 | '%': true, // pct-encoded (and used in IPv6 zones) | |
247 | '&': true, // sub-delims | |
248 | '(': true, // sub-delims | |
249 | ')': true, // sub-delims | |
250 | '*': true, // sub-delims | |
251 | '+': true, // sub-delims | |
252 | ',': true, // sub-delims | |
253 | '-': true, // unreserved | |
254 | '.': true, // unreserved | |
255 | ':': true, // IPv6address + Host expression's optional port | |
256 | ';': true, // sub-delims | |
257 | '=': true, // sub-delims | |
258 | '[': true, | |
259 | '\'': true, // sub-delims | |
260 | ']': true, | |
261 | '_': true, // unreserved | |
262 | '~': true, // unreserved | |
263 | } | |
264 | ||
265 | // ValidHeaderFieldValue reports whether v is a valid "field-value" according to | |
266 | // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : | |
267 | // | |
268 | // message-header = field-name ":" [ field-value ] | |
269 | // field-value = *( field-content | LWS ) | |
270 | // field-content = <the OCTETs making up the field-value | |
271 | // and consisting of either *TEXT or combinations | |
272 | // of token, separators, and quoted-string> | |
273 | // | |
274 | // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : | |
275 | // | |
276 | // TEXT = <any OCTET except CTLs, | |
277 | // but including LWS> | |
278 | // LWS = [CRLF] 1*( SP | HT ) | |
279 | // CTL = <any US-ASCII control character | |
280 | // (octets 0 - 31) and DEL (127)> | |
281 | // | |
282 | // RFC 7230 says: | |
283 | // field-value = *( field-content / obs-fold ) | |
284 | // obj-fold = N/A to http2, and deprecated | |
285 | // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] | |
286 | // field-vchar = VCHAR / obs-text | |
287 | // obs-text = %x80-FF | |
288 | // VCHAR = "any visible [USASCII] character" | |
289 | // | |
290 | // http2 further says: "Similarly, HTTP/2 allows header field values | |
291 | // that are not valid. While most of the values that can be encoded | |
292 | // will not alter header field parsing, carriage return (CR, ASCII | |
293 | // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII | |
294 | // 0x0) might be exploited by an attacker if they are translated | |
295 | // verbatim. Any request or response that contains a character not | |
296 | // permitted in a header field value MUST be treated as malformed | |
297 | // (Section 8.1.2.6). Valid characters are defined by the | |
298 | // field-content ABNF rule in Section 3.2 of [RFC7230]." | |
299 | // | |
300 | // This function does not (yet?) properly handle the rejection of | |
301 | // strings that begin or end with SP or HTAB. | |
302 | func ValidHeaderFieldValue(v string) bool { | |
303 | for i := 0; i < len(v); i++ { | |
304 | b := v[i] | |
305 | if isCTL(b) && !isLWS(b) { | |
306 | return false | |
307 | } | |
308 | } | |
309 | return true | |
310 | } | |
311 | ||
312 | func isASCII(s string) bool { | |
313 | for i := 0; i < len(s); i++ { | |
314 | if s[i] >= utf8.RuneSelf { | |
315 | return false | |
316 | } | |
317 | } | |
318 | return true | |
319 | } | |
320 | ||
321 | // PunycodeHostPort returns the IDNA Punycode version | |
322 | // of the provided "host" or "host:port" string. | |
323 | func PunycodeHostPort(v string) (string, error) { | |
324 | if isASCII(v) { | |
325 | return v, nil | |
326 | } | |
327 | ||
328 | host, port, err := net.SplitHostPort(v) | |
329 | if err != nil { | |
330 | // The input 'v' argument was just a "host" argument, | |
331 | // without a port. This error should not be returned | |
332 | // to the caller. | |
333 | host = v | |
334 | port = "" | |
335 | } | |
336 | host, err = idna.ToASCII(host) | |
337 | if err != nil { | |
338 | // Non-UTF-8? Not representable in Punycode, in any | |
339 | // case. | |
340 | return "", err | |
341 | } | |
342 | if port == "" { | |
343 | return host, nil | |
344 | } | |
345 | return net.JoinHostPort(host, port), nil | |
346 | } |