]>
Commit | Line | Data |
---|---|---|
07971ca3 AP |
1 | // Copyright 2012 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | // +build ignore | |
6 | ||
15c0b25d AP |
7 | //go:generate go run gen.go |
8 | //go:generate go run gen.go -test | |
07971ca3 | 9 | |
15c0b25d | 10 | package main |
07971ca3 AP |
11 | |
12 | import ( | |
15c0b25d | 13 | "bytes" |
07971ca3 AP |
14 | "flag" |
15 | "fmt" | |
15c0b25d AP |
16 | "go/format" |
17 | "io/ioutil" | |
07971ca3 AP |
18 | "math/rand" |
19 | "os" | |
20 | "sort" | |
21 | "strings" | |
22 | ) | |
23 | ||
24 | // identifier converts s to a Go exported identifier. | |
25 | // It converts "div" to "Div" and "accept-charset" to "AcceptCharset". | |
26 | func identifier(s string) string { | |
27 | b := make([]byte, 0, len(s)) | |
28 | cap := true | |
29 | for _, c := range s { | |
30 | if c == '-' { | |
31 | cap = true | |
32 | continue | |
33 | } | |
34 | if cap && 'a' <= c && c <= 'z' { | |
35 | c -= 'a' - 'A' | |
36 | } | |
37 | cap = false | |
38 | b = append(b, byte(c)) | |
39 | } | |
40 | return string(b) | |
41 | } | |
42 | ||
43 | var test = flag.Bool("test", false, "generate table_test.go") | |
44 | ||
15c0b25d AP |
45 | func genFile(name string, buf *bytes.Buffer) { |
46 | b, err := format.Source(buf.Bytes()) | |
47 | if err != nil { | |
48 | fmt.Fprintln(os.Stderr, err) | |
49 | os.Exit(1) | |
50 | } | |
51 | if err := ioutil.WriteFile(name, b, 0644); err != nil { | |
52 | fmt.Fprintln(os.Stderr, err) | |
53 | os.Exit(1) | |
54 | } | |
55 | } | |
56 | ||
07971ca3 AP |
57 | func main() { |
58 | flag.Parse() | |
59 | ||
60 | var all []string | |
61 | all = append(all, elements...) | |
62 | all = append(all, attributes...) | |
63 | all = append(all, eventHandlers...) | |
64 | all = append(all, extra...) | |
65 | sort.Strings(all) | |
66 | ||
07971ca3 | 67 | // uniq - lists have dups |
07971ca3 AP |
68 | w := 0 |
69 | for _, s := range all { | |
70 | if w == 0 || all[w-1] != s { | |
07971ca3 AP |
71 | all[w] = s |
72 | w++ | |
73 | } | |
74 | } | |
75 | all = all[:w] | |
76 | ||
15c0b25d AP |
77 | if *test { |
78 | var buf bytes.Buffer | |
79 | fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") | |
80 | fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n") | |
81 | fmt.Fprintln(&buf, "package atom\n") | |
82 | fmt.Fprintln(&buf, "var testAtomList = []string{") | |
83 | for _, s := range all { | |
84 | fmt.Fprintf(&buf, "\t%q,\n", s) | |
85 | } | |
86 | fmt.Fprintln(&buf, "}") | |
87 | ||
88 | genFile("table_test.go", &buf) | |
89 | return | |
90 | } | |
91 | ||
07971ca3 AP |
92 | // Find hash that minimizes table size. |
93 | var best *table | |
94 | for i := 0; i < 1000000; i++ { | |
95 | if best != nil && 1<<(best.k-1) < len(all) { | |
96 | break | |
97 | } | |
98 | h := rand.Uint32() | |
99 | for k := uint(0); k <= 16; k++ { | |
100 | if best != nil && k >= best.k { | |
101 | break | |
102 | } | |
103 | var t table | |
104 | if t.init(h, k, all) { | |
105 | best = &t | |
106 | break | |
107 | } | |
108 | } | |
109 | } | |
110 | if best == nil { | |
111 | fmt.Fprintf(os.Stderr, "failed to construct string table\n") | |
112 | os.Exit(1) | |
113 | } | |
114 | ||
115 | // Lay out strings, using overlaps when possible. | |
116 | layout := append([]string{}, all...) | |
117 | ||
118 | // Remove strings that are substrings of other strings | |
119 | for changed := true; changed; { | |
120 | changed = false | |
121 | for i, s := range layout { | |
122 | if s == "" { | |
123 | continue | |
124 | } | |
125 | for j, t := range layout { | |
126 | if i != j && t != "" && strings.Contains(s, t) { | |
127 | changed = true | |
128 | layout[j] = "" | |
129 | } | |
130 | } | |
131 | } | |
132 | } | |
133 | ||
134 | // Join strings where one suffix matches another prefix. | |
135 | for { | |
136 | // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], | |
137 | // maximizing overlap length k. | |
138 | besti := -1 | |
139 | bestj := -1 | |
140 | bestk := 0 | |
141 | for i, s := range layout { | |
142 | if s == "" { | |
143 | continue | |
144 | } | |
145 | for j, t := range layout { | |
146 | if i == j { | |
147 | continue | |
148 | } | |
149 | for k := bestk + 1; k <= len(s) && k <= len(t); k++ { | |
150 | if s[len(s)-k:] == t[:k] { | |
151 | besti = i | |
152 | bestj = j | |
153 | bestk = k | |
154 | } | |
155 | } | |
156 | } | |
157 | } | |
158 | if bestk > 0 { | |
159 | layout[besti] += layout[bestj][bestk:] | |
160 | layout[bestj] = "" | |
161 | continue | |
162 | } | |
163 | break | |
164 | } | |
165 | ||
166 | text := strings.Join(layout, "") | |
167 | ||
168 | atom := map[string]uint32{} | |
169 | for _, s := range all { | |
170 | off := strings.Index(text, s) | |
171 | if off < 0 { | |
172 | panic("lost string " + s) | |
173 | } | |
174 | atom[s] = uint32(off<<8 | len(s)) | |
175 | } | |
176 | ||
15c0b25d | 177 | var buf bytes.Buffer |
07971ca3 | 178 | // Generate the Go code. |
15c0b25d AP |
179 | fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") |
180 | fmt.Fprintln(&buf, "//go:generate go run gen.go\n") | |
181 | fmt.Fprintln(&buf, "package atom\n\nconst (") | |
182 | ||
183 | // compute max len | |
184 | maxLen := 0 | |
07971ca3 | 185 | for _, s := range all { |
15c0b25d AP |
186 | if maxLen < len(s) { |
187 | maxLen = len(s) | |
188 | } | |
189 | fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s]) | |
07971ca3 | 190 | } |
15c0b25d | 191 | fmt.Fprintln(&buf, ")\n") |
07971ca3 | 192 | |
15c0b25d AP |
193 | fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0) |
194 | fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen) | |
07971ca3 | 195 | |
15c0b25d | 196 | fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k) |
07971ca3 AP |
197 | for i, s := range best.tab { |
198 | if s == "" { | |
199 | continue | |
200 | } | |
15c0b25d | 201 | fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s) |
07971ca3 | 202 | } |
15c0b25d | 203 | fmt.Fprintf(&buf, "}\n") |
07971ca3 AP |
204 | datasize := (1 << best.k) * 4 |
205 | ||
15c0b25d | 206 | fmt.Fprintln(&buf, "const atomText =") |
07971ca3 AP |
207 | textsize := len(text) |
208 | for len(text) > 60 { | |
15c0b25d | 209 | fmt.Fprintf(&buf, "\t%q +\n", text[:60]) |
07971ca3 AP |
210 | text = text[60:] |
211 | } | |
15c0b25d AP |
212 | fmt.Fprintf(&buf, "\t%q\n\n", text) |
213 | ||
214 | genFile("table.go", &buf) | |
07971ca3 | 215 | |
15c0b25d | 216 | fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) |
07971ca3 AP |
217 | } |
218 | ||
219 | type byLen []string | |
220 | ||
221 | func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } | |
222 | func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } | |
223 | func (x byLen) Len() int { return len(x) } | |
224 | ||
225 | // fnv computes the FNV hash with an arbitrary starting value h. | |
226 | func fnv(h uint32, s string) uint32 { | |
227 | for i := 0; i < len(s); i++ { | |
228 | h ^= uint32(s[i]) | |
229 | h *= 16777619 | |
230 | } | |
231 | return h | |
232 | } | |
233 | ||
234 | // A table represents an attempt at constructing the lookup table. | |
235 | // The lookup table uses cuckoo hashing, meaning that each string | |
236 | // can be found in one of two positions. | |
237 | type table struct { | |
238 | h0 uint32 | |
239 | k uint | |
240 | mask uint32 | |
241 | tab []string | |
242 | } | |
243 | ||
244 | // hash returns the two hashes for s. | |
245 | func (t *table) hash(s string) (h1, h2 uint32) { | |
246 | h := fnv(t.h0, s) | |
247 | h1 = h & t.mask | |
248 | h2 = (h >> 16) & t.mask | |
249 | return | |
250 | } | |
251 | ||
252 | // init initializes the table with the given parameters. | |
253 | // h0 is the initial hash value, | |
254 | // k is the number of bits of hash value to use, and | |
255 | // x is the list of strings to store in the table. | |
256 | // init returns false if the table cannot be constructed. | |
257 | func (t *table) init(h0 uint32, k uint, x []string) bool { | |
258 | t.h0 = h0 | |
259 | t.k = k | |
260 | t.tab = make([]string, 1<<k) | |
261 | t.mask = 1<<k - 1 | |
262 | for _, s := range x { | |
263 | if !t.insert(s) { | |
264 | return false | |
265 | } | |
266 | } | |
267 | return true | |
268 | } | |
269 | ||
270 | // insert inserts s in the table. | |
271 | func (t *table) insert(s string) bool { | |
272 | h1, h2 := t.hash(s) | |
273 | if t.tab[h1] == "" { | |
274 | t.tab[h1] = s | |
275 | return true | |
276 | } | |
277 | if t.tab[h2] == "" { | |
278 | t.tab[h2] = s | |
279 | return true | |
280 | } | |
281 | if t.push(h1, 0) { | |
282 | t.tab[h1] = s | |
283 | return true | |
284 | } | |
285 | if t.push(h2, 0) { | |
286 | t.tab[h2] = s | |
287 | return true | |
288 | } | |
289 | return false | |
290 | } | |
291 | ||
292 | // push attempts to push aside the entry in slot i. | |
293 | func (t *table) push(i uint32, depth int) bool { | |
294 | if depth > len(t.tab) { | |
295 | return false | |
296 | } | |
297 | s := t.tab[i] | |
298 | h1, h2 := t.hash(s) | |
299 | j := h1 + h2 - i | |
300 | if t.tab[j] != "" && !t.push(j, depth+1) { | |
301 | return false | |
302 | } | |
303 | t.tab[j] = s | |
304 | return true | |
305 | } | |
306 | ||
307 | // The lists of element names and attribute keys were taken from | |
308 | // https://html.spec.whatwg.org/multipage/indices.html#index | |
15c0b25d | 309 | // as of the "HTML Living Standard - Last Updated 18 September 2017" version. |
07971ca3 | 310 | |
15c0b25d AP |
311 | // "command", "keygen" and "menuitem" have been removed from the spec, |
312 | // but are kept here for backwards compatibility. | |
07971ca3 AP |
313 | var elements = []string{ |
314 | "a", | |
315 | "abbr", | |
316 | "address", | |
317 | "area", | |
318 | "article", | |
319 | "aside", | |
320 | "audio", | |
321 | "b", | |
322 | "base", | |
323 | "bdi", | |
324 | "bdo", | |
325 | "blockquote", | |
326 | "body", | |
327 | "br", | |
328 | "button", | |
329 | "canvas", | |
330 | "caption", | |
331 | "cite", | |
332 | "code", | |
333 | "col", | |
334 | "colgroup", | |
335 | "command", | |
336 | "data", | |
337 | "datalist", | |
338 | "dd", | |
339 | "del", | |
340 | "details", | |
341 | "dfn", | |
342 | "dialog", | |
343 | "div", | |
344 | "dl", | |
345 | "dt", | |
346 | "em", | |
347 | "embed", | |
348 | "fieldset", | |
349 | "figcaption", | |
350 | "figure", | |
351 | "footer", | |
352 | "form", | |
353 | "h1", | |
354 | "h2", | |
355 | "h3", | |
356 | "h4", | |
357 | "h5", | |
358 | "h6", | |
359 | "head", | |
360 | "header", | |
361 | "hgroup", | |
362 | "hr", | |
363 | "html", | |
364 | "i", | |
365 | "iframe", | |
366 | "img", | |
367 | "input", | |
368 | "ins", | |
369 | "kbd", | |
370 | "keygen", | |
371 | "label", | |
372 | "legend", | |
373 | "li", | |
374 | "link", | |
15c0b25d | 375 | "main", |
07971ca3 AP |
376 | "map", |
377 | "mark", | |
378 | "menu", | |
379 | "menuitem", | |
380 | "meta", | |
381 | "meter", | |
382 | "nav", | |
383 | "noscript", | |
384 | "object", | |
385 | "ol", | |
386 | "optgroup", | |
387 | "option", | |
388 | "output", | |
389 | "p", | |
390 | "param", | |
15c0b25d | 391 | "picture", |
07971ca3 AP |
392 | "pre", |
393 | "progress", | |
394 | "q", | |
395 | "rp", | |
396 | "rt", | |
397 | "ruby", | |
398 | "s", | |
399 | "samp", | |
400 | "script", | |
401 | "section", | |
402 | "select", | |
15c0b25d | 403 | "slot", |
07971ca3 AP |
404 | "small", |
405 | "source", | |
406 | "span", | |
407 | "strong", | |
408 | "style", | |
409 | "sub", | |
410 | "summary", | |
411 | "sup", | |
412 | "table", | |
413 | "tbody", | |
414 | "td", | |
415 | "template", | |
416 | "textarea", | |
417 | "tfoot", | |
418 | "th", | |
419 | "thead", | |
420 | "time", | |
421 | "title", | |
422 | "tr", | |
423 | "track", | |
424 | "u", | |
425 | "ul", | |
426 | "var", | |
427 | "video", | |
428 | "wbr", | |
429 | } | |
430 | ||
431 | // https://html.spec.whatwg.org/multipage/indices.html#attributes-3 | |
15c0b25d AP |
432 | // |
433 | // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup", | |
434 | // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec, | |
435 | // but are kept here for backwards compatibility. | |
07971ca3 AP |
436 | var attributes = []string{ |
437 | "abbr", | |
438 | "accept", | |
439 | "accept-charset", | |
440 | "accesskey", | |
441 | "action", | |
15c0b25d AP |
442 | "allowfullscreen", |
443 | "allowpaymentrequest", | |
444 | "allowusermedia", | |
07971ca3 | 445 | "alt", |
15c0b25d | 446 | "as", |
07971ca3 AP |
447 | "async", |
448 | "autocomplete", | |
449 | "autofocus", | |
450 | "autoplay", | |
451 | "challenge", | |
452 | "charset", | |
453 | "checked", | |
454 | "cite", | |
455 | "class", | |
15c0b25d | 456 | "color", |
07971ca3 AP |
457 | "cols", |
458 | "colspan", | |
459 | "command", | |
460 | "content", | |
461 | "contenteditable", | |
462 | "contextmenu", | |
463 | "controls", | |
464 | "coords", | |
465 | "crossorigin", | |
466 | "data", | |
467 | "datetime", | |
468 | "default", | |
469 | "defer", | |
470 | "dir", | |
471 | "dirname", | |
472 | "disabled", | |
473 | "download", | |
474 | "draggable", | |
475 | "dropzone", | |
476 | "enctype", | |
477 | "for", | |
478 | "form", | |
479 | "formaction", | |
480 | "formenctype", | |
481 | "formmethod", | |
482 | "formnovalidate", | |
483 | "formtarget", | |
484 | "headers", | |
485 | "height", | |
486 | "hidden", | |
487 | "high", | |
488 | "href", | |
489 | "hreflang", | |
490 | "http-equiv", | |
491 | "icon", | |
492 | "id", | |
493 | "inputmode", | |
15c0b25d AP |
494 | "integrity", |
495 | "is", | |
07971ca3 AP |
496 | "ismap", |
497 | "itemid", | |
498 | "itemprop", | |
499 | "itemref", | |
500 | "itemscope", | |
501 | "itemtype", | |
502 | "keytype", | |
503 | "kind", | |
504 | "label", | |
505 | "lang", | |
506 | "list", | |
507 | "loop", | |
508 | "low", | |
509 | "manifest", | |
510 | "max", | |
511 | "maxlength", | |
512 | "media", | |
513 | "mediagroup", | |
514 | "method", | |
515 | "min", | |
516 | "minlength", | |
517 | "multiple", | |
518 | "muted", | |
519 | "name", | |
15c0b25d AP |
520 | "nomodule", |
521 | "nonce", | |
07971ca3 AP |
522 | "novalidate", |
523 | "open", | |
524 | "optimum", | |
525 | "pattern", | |
526 | "ping", | |
527 | "placeholder", | |
15c0b25d | 528 | "playsinline", |
07971ca3 AP |
529 | "poster", |
530 | "preload", | |
531 | "radiogroup", | |
532 | "readonly", | |
15c0b25d | 533 | "referrerpolicy", |
07971ca3 AP |
534 | "rel", |
535 | "required", | |
536 | "reversed", | |
537 | "rows", | |
538 | "rowspan", | |
539 | "sandbox", | |
540 | "spellcheck", | |
541 | "scope", | |
542 | "scoped", | |
543 | "seamless", | |
544 | "selected", | |
545 | "shape", | |
546 | "size", | |
547 | "sizes", | |
548 | "sortable", | |
549 | "sorted", | |
15c0b25d | 550 | "slot", |
07971ca3 | 551 | "span", |
15c0b25d | 552 | "spellcheck", |
07971ca3 AP |
553 | "src", |
554 | "srcdoc", | |
555 | "srclang", | |
15c0b25d | 556 | "srcset", |
07971ca3 AP |
557 | "start", |
558 | "step", | |
559 | "style", | |
560 | "tabindex", | |
561 | "target", | |
562 | "title", | |
563 | "translate", | |
564 | "type", | |
565 | "typemustmatch", | |
15c0b25d | 566 | "updateviacache", |
07971ca3 AP |
567 | "usemap", |
568 | "value", | |
569 | "width", | |
15c0b25d | 570 | "workertype", |
07971ca3 AP |
571 | "wrap", |
572 | } | |
573 | ||
15c0b25d AP |
574 | // "onautocomplete", "onautocompleteerror", "onmousewheel", |
575 | // "onshow" and "onsort" have been removed from the spec, | |
576 | // but are kept here for backwards compatibility. | |
07971ca3 AP |
577 | var eventHandlers = []string{ |
578 | "onabort", | |
579 | "onautocomplete", | |
580 | "onautocompleteerror", | |
15c0b25d | 581 | "onauxclick", |
07971ca3 AP |
582 | "onafterprint", |
583 | "onbeforeprint", | |
584 | "onbeforeunload", | |
585 | "onblur", | |
586 | "oncancel", | |
587 | "oncanplay", | |
588 | "oncanplaythrough", | |
589 | "onchange", | |
590 | "onclick", | |
591 | "onclose", | |
592 | "oncontextmenu", | |
15c0b25d | 593 | "oncopy", |
07971ca3 | 594 | "oncuechange", |
15c0b25d | 595 | "oncut", |
07971ca3 AP |
596 | "ondblclick", |
597 | "ondrag", | |
598 | "ondragend", | |
599 | "ondragenter", | |
15c0b25d | 600 | "ondragexit", |
07971ca3 AP |
601 | "ondragleave", |
602 | "ondragover", | |
603 | "ondragstart", | |
604 | "ondrop", | |
605 | "ondurationchange", | |
606 | "onemptied", | |
607 | "onended", | |
608 | "onerror", | |
609 | "onfocus", | |
610 | "onhashchange", | |
611 | "oninput", | |
612 | "oninvalid", | |
613 | "onkeydown", | |
614 | "onkeypress", | |
615 | "onkeyup", | |
616 | "onlanguagechange", | |
617 | "onload", | |
618 | "onloadeddata", | |
619 | "onloadedmetadata", | |
15c0b25d | 620 | "onloadend", |
07971ca3 AP |
621 | "onloadstart", |
622 | "onmessage", | |
15c0b25d | 623 | "onmessageerror", |
07971ca3 | 624 | "onmousedown", |
15c0b25d AP |
625 | "onmouseenter", |
626 | "onmouseleave", | |
07971ca3 AP |
627 | "onmousemove", |
628 | "onmouseout", | |
629 | "onmouseover", | |
630 | "onmouseup", | |
631 | "onmousewheel", | |
15c0b25d | 632 | "onwheel", |
07971ca3 AP |
633 | "onoffline", |
634 | "ononline", | |
635 | "onpagehide", | |
636 | "onpageshow", | |
15c0b25d | 637 | "onpaste", |
07971ca3 AP |
638 | "onpause", |
639 | "onplay", | |
640 | "onplaying", | |
641 | "onpopstate", | |
642 | "onprogress", | |
643 | "onratechange", | |
644 | "onreset", | |
645 | "onresize", | |
15c0b25d | 646 | "onrejectionhandled", |
07971ca3 | 647 | "onscroll", |
15c0b25d | 648 | "onsecuritypolicyviolation", |
07971ca3 AP |
649 | "onseeked", |
650 | "onseeking", | |
651 | "onselect", | |
652 | "onshow", | |
653 | "onsort", | |
654 | "onstalled", | |
655 | "onstorage", | |
656 | "onsubmit", | |
657 | "onsuspend", | |
658 | "ontimeupdate", | |
659 | "ontoggle", | |
15c0b25d | 660 | "onunhandledrejection", |
07971ca3 AP |
661 | "onunload", |
662 | "onvolumechange", | |
663 | "onwaiting", | |
664 | } | |
665 | ||
666 | // extra are ad-hoc values not covered by any of the lists above. | |
667 | var extra = []string{ | |
668 | "align", | |
669 | "annotation", | |
670 | "annotation-xml", | |
671 | "applet", | |
672 | "basefont", | |
673 | "bgsound", | |
674 | "big", | |
675 | "blink", | |
676 | "center", | |
677 | "color", | |
678 | "desc", | |
679 | "face", | |
680 | "font", | |
681 | "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. | |
682 | "foreignobject", | |
683 | "frame", | |
684 | "frameset", | |
685 | "image", | |
686 | "isindex", | |
687 | "listing", | |
688 | "malignmark", | |
689 | "marquee", | |
690 | "math", | |
691 | "mglyph", | |
692 | "mi", | |
693 | "mn", | |
694 | "mo", | |
695 | "ms", | |
696 | "mtext", | |
697 | "nobr", | |
698 | "noembed", | |
699 | "noframes", | |
700 | "plaintext", | |
701 | "prompt", | |
702 | "public", | |
703 | "spacer", | |
704 | "strike", | |
705 | "svg", | |
706 | "system", | |
707 | "tt", | |
708 | "xmp", | |
709 | } |