1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 //go:generate go run gen.go
8 //go:generate go run gen.go -test
24 // identifier converts s to a Go exported identifier.
25 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
26 func identifier(s string) string {
27 b := make([]byte, 0, len(s))
34 if cap && 'a' <= c && c <= 'z' {
38 b = append(b, byte(c))
43 var test = flag.Bool("test", false, "generate table_test.go")
45 func genFile(name string, buf *bytes.Buffer) {
46 b, err := format.Source(buf.Bytes())
48 fmt.Fprintln(os.Stderr, err)
51 if err := ioutil.WriteFile(name, b, 0644); err != nil {
52 fmt.Fprintln(os.Stderr, err)
61 all = append(all, elements...)
62 all = append(all, attributes...)
63 all = append(all, eventHandlers...)
64 all = append(all, extra...)
67 // uniq - lists have dups
69 for _, s := range all {
70 if w == 0 || all[w-1] != s {
79 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
80 fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
81 fmt.Fprintln(&buf, "package atom\n")
82 fmt.Fprintln(&buf, "var testAtomList = []string{")
83 for _, s := range all {
84 fmt.Fprintf(&buf, "\t%q,\n", s)
86 fmt.Fprintln(&buf, "}")
88 genFile("table_test.go", &buf)
92 // Find hash that minimizes table size.
94 for i := 0; i < 1000000; i++ {
95 if best != nil && 1<<(best.k-1) < len(all) {
99 for k := uint(0); k <= 16; k++ {
100 if best != nil && k >= best.k {
104 if t.init(h, k, all) {
111 fmt.Fprintf(os.Stderr, "failed to construct string table\n")
115 // Lay out strings, using overlaps when possible.
116 layout := append([]string{}, all...)
118 // Remove strings that are substrings of other strings
119 for changed := true; changed; {
121 for i, s := range layout {
125 for j, t := range layout {
126 if i != j && t != "" && strings.Contains(s, t) {
134 // Join strings where one suffix matches another prefix.
136 // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
137 // maximizing overlap length k.
141 for i, s := range layout {
145 for j, t := range layout {
149 for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
150 if s[len(s)-k:] == t[:k] {
159 layout[besti] += layout[bestj][bestk:]
166 text := strings.Join(layout, "")
168 atom := map[string]uint32{}
169 for _, s := range all {
170 off := strings.Index(text, s)
172 panic("lost string " + s)
174 atom[s] = uint32(off<<8 | len(s))
178 // Generate the Go code.
179 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
180 fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
181 fmt.Fprintln(&buf, "package atom\n\nconst (")
185 for _, s := range all {
189 fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
191 fmt.Fprintln(&buf, ")\n")
193 fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
194 fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
196 fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
197 for i, s := range best.tab {
201 fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
203 fmt.Fprintf(&buf, "}\n")
204 datasize := (1 << best.k) * 4
206 fmt.Fprintln(&buf, "const atomText =")
207 textsize := len(text)
209 fmt.Fprintf(&buf, "\t%q +\n", text[:60])
212 fmt.Fprintf(&buf, "\t%q\n\n", text)
214 genFile("table.go", &buf)
216 fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
221 func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
222 func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
223 func (x byLen) Len() int { return len(x) }
225 // fnv computes the FNV hash with an arbitrary starting value h.
226 func fnv(h uint32, s string) uint32 {
227 for i := 0; i < len(s); i++ {
234 // A table represents an attempt at constructing the lookup table.
235 // The lookup table uses cuckoo hashing, meaning that each string
236 // can be found in one of two positions.
244 // hash returns the two hashes for s.
245 func (t *table) hash(s string) (h1, h2 uint32) {
248 h2 = (h >> 16) & t.mask
252 // init initializes the table with the given parameters.
253 // h0 is the initial hash value,
254 // k is the number of bits of hash value to use, and
255 // x is the list of strings to store in the table.
256 // init returns false if the table cannot be constructed.
257 func (t *table) init(h0 uint32, k uint, x []string) bool {
260 t.tab = make([]string, 1<<k)
262 for _, s := range x {
270 // insert inserts s in the table.
271 func (t *table) insert(s string) bool {
292 // push attempts to push aside the entry in slot i.
293 func (t *table) push(i uint32, depth int) bool {
294 if depth > len(t.tab) {
300 if t.tab[j] != "" && !t.push(j, depth+1) {
307 // The lists of element names and attribute keys were taken from
308 // https://html.spec.whatwg.org/multipage/indices.html#index
309 // as of the "HTML Living Standard - Last Updated 18 September 2017" version.
311 // "command", "keygen" and "menuitem" have been removed from the spec,
312 // but are kept here for backwards compatibility.
313 var elements = []string{
431 // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
433 // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
434 // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
435 // but are kept here for backwards compatibility.
436 var attributes = []string{
443 "allowpaymentrequest",
574 // "onautocomplete", "onautocompleteerror", "onmousewheel",
575 // "onshow" and "onsort" have been removed from the spec,
576 // but are kept here for backwards compatibility.
577 var eventHandlers = []string{
580 "onautocompleteerror",
646 "onrejectionhandled",
648 "onsecuritypolicyviolation",
660 "onunhandledrejection",
666 // extra are ad-hoc values not covered by any of the lists above.
667 var extra = []string{
681 "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.