aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/golang.org/x/net/html
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/net/html')
-rw-r--r--vendor/golang.org/x/net/html/atom/atom.go78
-rw-r--r--vendor/golang.org/x/net/html/atom/table.go713
-rw-r--r--vendor/golang.org/x/net/html/const.go102
-rw-r--r--vendor/golang.org/x/net/html/doc.go106
-rw-r--r--vendor/golang.org/x/net/html/doctype.go156
-rw-r--r--vendor/golang.org/x/net/html/entity.go2253
-rw-r--r--vendor/golang.org/x/net/html/escape.go258
-rw-r--r--vendor/golang.org/x/net/html/foreign.go226
-rw-r--r--vendor/golang.org/x/net/html/node.go193
-rw-r--r--vendor/golang.org/x/net/html/parse.go2094
-rw-r--r--vendor/golang.org/x/net/html/render.go271
-rw-r--r--vendor/golang.org/x/net/html/token.go1219
12 files changed, 7669 insertions, 0 deletions
diff --git a/vendor/golang.org/x/net/html/atom/atom.go b/vendor/golang.org/x/net/html/atom/atom.go
new file mode 100644
index 0000000..cd0a8ac
--- /dev/null
+++ b/vendor/golang.org/x/net/html/atom/atom.go
@@ -0,0 +1,78 @@
1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package atom provides integer codes (also known as atoms) for a fixed set of
6// frequently occurring HTML strings: tag names and attribute keys such as "p"
7// and "id".
8//
9// Sharing an atom's name between all elements with the same tag can result in
10// fewer string allocations when tokenizing and parsing HTML. Integer
11// comparisons are also generally faster than string comparisons.
12//
13// The value of an atom's particular code is not guaranteed to stay the same
14// between versions of this package. Neither is any ordering guaranteed:
15// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
16// be dense. The only guarantees are that e.g. looking up "div" will yield
17// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
18package atom // import "golang.org/x/net/html/atom"
19
20// Atom is an integer code for a string. The zero value maps to "".
21type Atom uint32
22
23// String returns the atom's name.
24func (a Atom) String() string {
25 start := uint32(a >> 8)
26 n := uint32(a & 0xff)
27 if start+n > uint32(len(atomText)) {
28 return ""
29 }
30 return atomText[start : start+n]
31}
32
33func (a Atom) string() string {
34 return atomText[a>>8 : a>>8+a&0xff]
35}
36
37// fnv computes the FNV hash with an arbitrary starting value h.
38func fnv(h uint32, s []byte) uint32 {
39 for i := range s {
40 h ^= uint32(s[i])
41 h *= 16777619
42 }
43 return h
44}
45
46func match(s string, t []byte) bool {
47 for i, c := range t {
48 if s[i] != c {
49 return false
50 }
51 }
52 return true
53}
54
55// Lookup returns the atom whose name is s. It returns zero if there is no
56// such atom. The lookup is case sensitive.
57func Lookup(s []byte) Atom {
58 if len(s) == 0 || len(s) > maxAtomLen {
59 return 0
60 }
61 h := fnv(hash0, s)
62 if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
63 return a
64 }
65 if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
66 return a
67 }
68 return 0
69}
70
71// String returns a string whose contents are equal to s. In that sense, it is
72// equivalent to string(s) but may be more efficient.
73func String(s []byte) string {
74 if a := Lookup(s); a != 0 {
75 return a.String()
76 }
77 return string(s)
78}
diff --git a/vendor/golang.org/x/net/html/atom/table.go b/vendor/golang.org/x/net/html/atom/table.go
new file mode 100644
index 0000000..2605ba3
--- /dev/null
+++ b/vendor/golang.org/x/net/html/atom/table.go
@@ -0,0 +1,713 @@
1// generated by go run gen.go; DO NOT EDIT
2
3package atom
4
5const (
6 A Atom = 0x1
7 Abbr Atom = 0x4
8 Accept Atom = 0x2106
9 AcceptCharset Atom = 0x210e
10 Accesskey Atom = 0x3309
11 Action Atom = 0x1f606
12 Address Atom = 0x4f307
13 Align Atom = 0x1105
14 Alt Atom = 0x4503
15 Annotation Atom = 0x1670a
16 AnnotationXml Atom = 0x1670e
17 Applet Atom = 0x2b306
18 Area Atom = 0x2fa04
19 Article Atom = 0x38807
20 Aside Atom = 0x8305
21 Async Atom = 0x7b05
22 Audio Atom = 0xa605
23 Autocomplete Atom = 0x1fc0c
24 Autofocus Atom = 0xb309
25 Autoplay Atom = 0xce08
26 B Atom = 0x101
27 Base Atom = 0xd604
28 Basefont Atom = 0xd608
29 Bdi Atom = 0x1a03
30 Bdo Atom = 0xe703
31 Bgsound Atom = 0x11807
32 Big Atom = 0x12403
33 Blink Atom = 0x12705
34 Blockquote Atom = 0x12c0a
35 Body Atom = 0x2f04
36 Br Atom = 0x202
37 Button Atom = 0x13606
38 Canvas Atom = 0x7f06
39 Caption Atom = 0x1bb07
40 Center Atom = 0x5b506
41 Challenge Atom = 0x21f09
42 Charset Atom = 0x2807
43 Checked Atom = 0x32807
44 Cite Atom = 0x3c804
45 Class Atom = 0x4de05
46 Code Atom = 0x14904
47 Col Atom = 0x15003
48 Colgroup Atom = 0x15008
49 Color Atom = 0x15d05
50 Cols Atom = 0x16204
51 Colspan Atom = 0x16207
52 Command Atom = 0x17507
53 Content Atom = 0x42307
54 Contenteditable Atom = 0x4230f
55 Contextmenu Atom = 0x3310b
56 Controls Atom = 0x18808
57 Coords Atom = 0x19406
58 Crossorigin Atom = 0x19f0b
59 Data Atom = 0x44a04
60 Datalist Atom = 0x44a08
61 Datetime Atom = 0x23c08
62 Dd Atom = 0x26702
63 Default Atom = 0x8607
64 Defer Atom = 0x14b05
65 Del Atom = 0x3ef03
66 Desc Atom = 0x4db04
67 Details Atom = 0x4807
68 Dfn Atom = 0x6103
69 Dialog Atom = 0x1b06
70 Dir Atom = 0x6903
71 Dirname Atom = 0x6907
72 Disabled Atom = 0x10c08
73 Div Atom = 0x11303
74 Dl Atom = 0x11e02
75 Download Atom = 0x40008
76 Draggable Atom = 0x17b09
77 Dropzone Atom = 0x39108
78 Dt Atom = 0x50902
79 Em Atom = 0x6502
80 Embed Atom = 0x6505
81 Enctype Atom = 0x21107
82 Face Atom = 0x5b304
83 Fieldset Atom = 0x1b008
84 Figcaption Atom = 0x1b80a
85 Figure Atom = 0x1cc06
86 Font Atom = 0xda04
87 Footer Atom = 0x8d06
88 For Atom = 0x1d803
89 ForeignObject Atom = 0x1d80d
90 Foreignobject Atom = 0x1e50d
91 Form Atom = 0x1f204
92 Formaction Atom = 0x1f20a
93 Formenctype Atom = 0x20d0b
94 Formmethod Atom = 0x2280a
95 Formnovalidate Atom = 0x2320e
96 Formtarget Atom = 0x2470a
97 Frame Atom = 0x9a05
98 Frameset Atom = 0x9a08
99 H1 Atom = 0x26e02
100 H2 Atom = 0x29402
101 H3 Atom = 0x2a702
102 H4 Atom = 0x2e902
103 H5 Atom = 0x2f302
104 H6 Atom = 0x50b02
105 Head Atom = 0x2d504
106 Header Atom = 0x2d506
107 Headers Atom = 0x2d507
108 Height Atom = 0x25106
109 Hgroup Atom = 0x25906
110 Hidden Atom = 0x26506
111 High Atom = 0x26b04
112 Hr Atom = 0x27002
113 Href Atom = 0x27004
114 Hreflang Atom = 0x27008
115 Html Atom = 0x25504
116 HttpEquiv Atom = 0x2780a
117 I Atom = 0x601
118 Icon Atom = 0x42204
119 Id Atom = 0x8502
120 Iframe Atom = 0x29606
121 Image Atom = 0x29c05
122 Img Atom = 0x2a103
123 Input Atom = 0x3e805
124 Inputmode Atom = 0x3e809
125 Ins Atom = 0x1a803
126 Isindex Atom = 0x2a907
127 Ismap Atom = 0x2b005
128 Itemid Atom = 0x33c06
129 Itemprop Atom = 0x3c908
130 Itemref Atom = 0x5ad07
131 Itemscope Atom = 0x2b909
132 Itemtype Atom = 0x2c308
133 Kbd Atom = 0x1903
134 Keygen Atom = 0x3906
135 Keytype Atom = 0x53707
136 Kind Atom = 0x10904
137 Label Atom = 0xf005
138 Lang Atom = 0x27404
139 Legend Atom = 0x18206
140 Li Atom = 0x1202
141 Link Atom = 0x12804
142 List Atom = 0x44e04
143 Listing Atom = 0x44e07
144 Loop Atom = 0xf404
145 Low Atom = 0x11f03
146 Malignmark Atom = 0x100a
147 Manifest Atom = 0x5f108
148 Map Atom = 0x2b203
149 Mark Atom = 0x1604
150 Marquee Atom = 0x2cb07
151 Math Atom = 0x2d204
152 Max Atom = 0x2e103
153 Maxlength Atom = 0x2e109
154 Media Atom = 0x6e05
155 Mediagroup Atom = 0x6e0a
156 Menu Atom = 0x33804
157 Menuitem Atom = 0x33808
158 Meta Atom = 0x45d04
159 Meter Atom = 0x24205
160 Method Atom = 0x22c06
161 Mglyph Atom = 0x2a206
162 Mi Atom = 0x2eb02
163 Min Atom = 0x2eb03
164 Minlength Atom = 0x2eb09
165 Mn Atom = 0x23502
166 Mo Atom = 0x3ed02
167 Ms Atom = 0x2bc02
168 Mtext Atom = 0x2f505
169 Multiple Atom = 0x30308
170 Muted Atom = 0x30b05
171 Name Atom = 0x6c04
172 Nav Atom = 0x3e03
173 Nobr Atom = 0x5704
174 Noembed Atom = 0x6307
175 Noframes Atom = 0x9808
176 Noscript Atom = 0x3d208
177 Novalidate Atom = 0x2360a
178 Object Atom = 0x1ec06
179 Ol Atom = 0xc902
180 Onabort Atom = 0x13a07
181 Onafterprint Atom = 0x1c00c
182 Onautocomplete Atom = 0x1fa0e
183 Onautocompleteerror Atom = 0x1fa13
184 Onbeforeprint Atom = 0x6040d
185 Onbeforeunload Atom = 0x4e70e
186 Onblur Atom = 0xaa06
187 Oncancel Atom = 0xe908
188 Oncanplay Atom = 0x28509
189 Oncanplaythrough Atom = 0x28510
190 Onchange Atom = 0x3a708
191 Onclick Atom = 0x31007
192 Onclose Atom = 0x31707
193 Oncontextmenu Atom = 0x32f0d
194 Oncuechange Atom = 0x3420b
195 Ondblclick Atom = 0x34d0a
196 Ondrag Atom = 0x35706
197 Ondragend Atom = 0x35709
198 Ondragenter Atom = 0x3600b
199 Ondragleave Atom = 0x36b0b
200 Ondragover Atom = 0x3760a
201 Ondragstart Atom = 0x3800b
202 Ondrop Atom = 0x38f06
203 Ondurationchange Atom = 0x39f10
204 Onemptied Atom = 0x39609
205 Onended Atom = 0x3af07
206 Onerror Atom = 0x3b607
207 Onfocus Atom = 0x3bd07
208 Onhashchange Atom = 0x3da0c
209 Oninput Atom = 0x3e607
210 Oninvalid Atom = 0x3f209
211 Onkeydown Atom = 0x3fb09
212 Onkeypress Atom = 0x4080a
213 Onkeyup Atom = 0x41807
214 Onlanguagechange Atom = 0x43210
215 Onload Atom = 0x44206
216 Onloadeddata Atom = 0x4420c
217 Onloadedmetadata Atom = 0x45510
218 Onloadstart Atom = 0x46b0b
219 Onmessage Atom = 0x47609
220 Onmousedown Atom = 0x47f0b
221 Onmousemove Atom = 0x48a0b
222 Onmouseout Atom = 0x4950a
223 Onmouseover Atom = 0x4a20b
224 Onmouseup Atom = 0x4ad09
225 Onmousewheel Atom = 0x4b60c
226 Onoffline Atom = 0x4c209
227 Ononline Atom = 0x4cb08
228 Onpagehide Atom = 0x4d30a
229 Onpageshow Atom = 0x4fe0a
230 Onpause Atom = 0x50d07
231 Onplay Atom = 0x51706
232 Onplaying Atom = 0x51709
233 Onpopstate Atom = 0x5200a
234 Onprogress Atom = 0x52a0a
235 Onratechange Atom = 0x53e0c
236 Onreset Atom = 0x54a07
237 Onresize Atom = 0x55108
238 Onscroll Atom = 0x55f08
239 Onseeked Atom = 0x56708
240 Onseeking Atom = 0x56f09
241 Onselect Atom = 0x57808
242 Onshow Atom = 0x58206
243 Onsort Atom = 0x58b06
244 Onstalled Atom = 0x59509
245 Onstorage Atom = 0x59e09
246 Onsubmit Atom = 0x5a708
247 Onsuspend Atom = 0x5bb09
248 Ontimeupdate Atom = 0xdb0c
249 Ontoggle Atom = 0x5c408
250 Onunload Atom = 0x5cc08
251 Onvolumechange Atom = 0x5d40e
252 Onwaiting Atom = 0x5e209
253 Open Atom = 0x3cf04
254 Optgroup Atom = 0xf608
255 Optimum Atom = 0x5eb07
256 Option Atom = 0x60006
257 Output Atom = 0x49c06
258 P Atom = 0xc01
259 Param Atom = 0xc05
260 Pattern Atom = 0x5107
261 Ping Atom = 0x7704
262 Placeholder Atom = 0xc30b
263 Plaintext Atom = 0xfd09
264 Poster Atom = 0x15706
265 Pre Atom = 0x25e03
266 Preload Atom = 0x25e07
267 Progress Atom = 0x52c08
268 Prompt Atom = 0x5fa06
269 Public Atom = 0x41e06
270 Q Atom = 0x13101
271 Radiogroup Atom = 0x30a
272 Readonly Atom = 0x2fb08
273 Rel Atom = 0x25f03
274 Required Atom = 0x1d008
275 Reversed Atom = 0x5a08
276 Rows Atom = 0x9204
277 Rowspan Atom = 0x9207
278 Rp Atom = 0x1c602
279 Rt Atom = 0x13f02
280 Ruby Atom = 0xaf04
281 S Atom = 0x2c01
282 Samp Atom = 0x4e04
283 Sandbox Atom = 0xbb07
284 Scope Atom = 0x2bd05
285 Scoped Atom = 0x2bd06
286 Script Atom = 0x3d406
287 Seamless Atom = 0x31c08
288 Section Atom = 0x4e207
289 Select Atom = 0x57a06
290 Selected Atom = 0x57a08
291 Shape Atom = 0x4f905
292 Size Atom = 0x55504
293 Sizes Atom = 0x55505
294 Small Atom = 0x18f05
295 Sortable Atom = 0x58d08
296 Sorted Atom = 0x19906
297 Source Atom = 0x1aa06
298 Spacer Atom = 0x2db06
299 Span Atom = 0x9504
300 Spellcheck Atom = 0x3230a
301 Src Atom = 0x3c303
302 Srcdoc Atom = 0x3c306
303 Srclang Atom = 0x41107
304 Start Atom = 0x38605
305 Step Atom = 0x5f704
306 Strike Atom = 0x53306
307 Strong Atom = 0x55906
308 Style Atom = 0x61105
309 Sub Atom = 0x5a903
310 Summary Atom = 0x61607
311 Sup Atom = 0x61d03
312 Svg Atom = 0x62003
313 System Atom = 0x62306
314 Tabindex Atom = 0x46308
315 Table Atom = 0x42d05
316 Target Atom = 0x24b06
317 Tbody Atom = 0x2e05
318 Td Atom = 0x4702
319 Template Atom = 0x62608
320 Textarea Atom = 0x2f608
321 Tfoot Atom = 0x8c05
322 Th Atom = 0x22e02
323 Thead Atom = 0x2d405
324 Time Atom = 0xdd04
325 Title Atom = 0xa105
326 Tr Atom = 0x10502
327 Track Atom = 0x10505
328 Translate Atom = 0x14009
329 Tt Atom = 0x5302
330 Type Atom = 0x21404
331 Typemustmatch Atom = 0x2140d
332 U Atom = 0xb01
333 Ul Atom = 0x8a02
334 Usemap Atom = 0x51106
335 Value Atom = 0x4005
336 Var Atom = 0x11503
337 Video Atom = 0x28105
338 Wbr Atom = 0x12103
339 Width Atom = 0x50705
340 Wrap Atom = 0x58704
341 Xmp Atom = 0xc103
342)
343
344const hash0 = 0xc17da63e
345
346const maxAtomLen = 19
347
348var table = [1 << 9]Atom{
349 0x1: 0x48a0b, // onmousemove
350 0x2: 0x5e209, // onwaiting
351 0x3: 0x1fa13, // onautocompleteerror
352 0x4: 0x5fa06, // prompt
353 0x7: 0x5eb07, // optimum
354 0x8: 0x1604, // mark
355 0xa: 0x5ad07, // itemref
356 0xb: 0x4fe0a, // onpageshow
357 0xc: 0x57a06, // select
358 0xd: 0x17b09, // draggable
359 0xe: 0x3e03, // nav
360 0xf: 0x17507, // command
361 0x11: 0xb01, // u
362 0x14: 0x2d507, // headers
363 0x15: 0x44a08, // datalist
364 0x17: 0x4e04, // samp
365 0x1a: 0x3fb09, // onkeydown
366 0x1b: 0x55f08, // onscroll
367 0x1c: 0x15003, // col
368 0x20: 0x3c908, // itemprop
369 0x21: 0x2780a, // http-equiv
370 0x22: 0x61d03, // sup
371 0x24: 0x1d008, // required
372 0x2b: 0x25e07, // preload
373 0x2c: 0x6040d, // onbeforeprint
374 0x2d: 0x3600b, // ondragenter
375 0x2e: 0x50902, // dt
376 0x2f: 0x5a708, // onsubmit
377 0x30: 0x27002, // hr
378 0x31: 0x32f0d, // oncontextmenu
379 0x33: 0x29c05, // image
380 0x34: 0x50d07, // onpause
381 0x35: 0x25906, // hgroup
382 0x36: 0x7704, // ping
383 0x37: 0x57808, // onselect
384 0x3a: 0x11303, // div
385 0x3b: 0x1fa0e, // onautocomplete
386 0x40: 0x2eb02, // mi
387 0x41: 0x31c08, // seamless
388 0x42: 0x2807, // charset
389 0x43: 0x8502, // id
390 0x44: 0x5200a, // onpopstate
391 0x45: 0x3ef03, // del
392 0x46: 0x2cb07, // marquee
393 0x47: 0x3309, // accesskey
394 0x49: 0x8d06, // footer
395 0x4a: 0x44e04, // list
396 0x4b: 0x2b005, // ismap
397 0x51: 0x33804, // menu
398 0x52: 0x2f04, // body
399 0x55: 0x9a08, // frameset
400 0x56: 0x54a07, // onreset
401 0x57: 0x12705, // blink
402 0x58: 0xa105, // title
403 0x59: 0x38807, // article
404 0x5b: 0x22e02, // th
405 0x5d: 0x13101, // q
406 0x5e: 0x3cf04, // open
407 0x5f: 0x2fa04, // area
408 0x61: 0x44206, // onload
409 0x62: 0xda04, // font
410 0x63: 0xd604, // base
411 0x64: 0x16207, // colspan
412 0x65: 0x53707, // keytype
413 0x66: 0x11e02, // dl
414 0x68: 0x1b008, // fieldset
415 0x6a: 0x2eb03, // min
416 0x6b: 0x11503, // var
417 0x6f: 0x2d506, // header
418 0x70: 0x13f02, // rt
419 0x71: 0x15008, // colgroup
420 0x72: 0x23502, // mn
421 0x74: 0x13a07, // onabort
422 0x75: 0x3906, // keygen
423 0x76: 0x4c209, // onoffline
424 0x77: 0x21f09, // challenge
425 0x78: 0x2b203, // map
426 0x7a: 0x2e902, // h4
427 0x7b: 0x3b607, // onerror
428 0x7c: 0x2e109, // maxlength
429 0x7d: 0x2f505, // mtext
430 0x7e: 0xbb07, // sandbox
431 0x7f: 0x58b06, // onsort
432 0x80: 0x100a, // malignmark
433 0x81: 0x45d04, // meta
434 0x82: 0x7b05, // async
435 0x83: 0x2a702, // h3
436 0x84: 0x26702, // dd
437 0x85: 0x27004, // href
438 0x86: 0x6e0a, // mediagroup
439 0x87: 0x19406, // coords
440 0x88: 0x41107, // srclang
441 0x89: 0x34d0a, // ondblclick
442 0x8a: 0x4005, // value
443 0x8c: 0xe908, // oncancel
444 0x8e: 0x3230a, // spellcheck
445 0x8f: 0x9a05, // frame
446 0x91: 0x12403, // big
447 0x94: 0x1f606, // action
448 0x95: 0x6903, // dir
449 0x97: 0x2fb08, // readonly
450 0x99: 0x42d05, // table
451 0x9a: 0x61607, // summary
452 0x9b: 0x12103, // wbr
453 0x9c: 0x30a, // radiogroup
454 0x9d: 0x6c04, // name
455 0x9f: 0x62306, // system
456 0xa1: 0x15d05, // color
457 0xa2: 0x7f06, // canvas
458 0xa3: 0x25504, // html
459 0xa5: 0x56f09, // onseeking
460 0xac: 0x4f905, // shape
461 0xad: 0x25f03, // rel
462 0xae: 0x28510, // oncanplaythrough
463 0xaf: 0x3760a, // ondragover
464 0xb0: 0x62608, // template
465 0xb1: 0x1d80d, // foreignObject
466 0xb3: 0x9204, // rows
467 0xb6: 0x44e07, // listing
468 0xb7: 0x49c06, // output
469 0xb9: 0x3310b, // contextmenu
470 0xbb: 0x11f03, // low
471 0xbc: 0x1c602, // rp
472 0xbd: 0x5bb09, // onsuspend
473 0xbe: 0x13606, // button
474 0xbf: 0x4db04, // desc
475 0xc1: 0x4e207, // section
476 0xc2: 0x52a0a, // onprogress
477 0xc3: 0x59e09, // onstorage
478 0xc4: 0x2d204, // math
479 0xc5: 0x4503, // alt
480 0xc7: 0x8a02, // ul
481 0xc8: 0x5107, // pattern
482 0xc9: 0x4b60c, // onmousewheel
483 0xca: 0x35709, // ondragend
484 0xcb: 0xaf04, // ruby
485 0xcc: 0xc01, // p
486 0xcd: 0x31707, // onclose
487 0xce: 0x24205, // meter
488 0xcf: 0x11807, // bgsound
489 0xd2: 0x25106, // height
490 0xd4: 0x101, // b
491 0xd5: 0x2c308, // itemtype
492 0xd8: 0x1bb07, // caption
493 0xd9: 0x10c08, // disabled
494 0xdb: 0x33808, // menuitem
495 0xdc: 0x62003, // svg
496 0xdd: 0x18f05, // small
497 0xde: 0x44a04, // data
498 0xe0: 0x4cb08, // ononline
499 0xe1: 0x2a206, // mglyph
500 0xe3: 0x6505, // embed
501 0xe4: 0x10502, // tr
502 0xe5: 0x46b0b, // onloadstart
503 0xe7: 0x3c306, // srcdoc
504 0xeb: 0x5c408, // ontoggle
505 0xed: 0xe703, // bdo
506 0xee: 0x4702, // td
507 0xef: 0x8305, // aside
508 0xf0: 0x29402, // h2
509 0xf1: 0x52c08, // progress
510 0xf2: 0x12c0a, // blockquote
511 0xf4: 0xf005, // label
512 0xf5: 0x601, // i
513 0xf7: 0x9207, // rowspan
514 0xfb: 0x51709, // onplaying
515 0xfd: 0x2a103, // img
516 0xfe: 0xf608, // optgroup
517 0xff: 0x42307, // content
518 0x101: 0x53e0c, // onratechange
519 0x103: 0x3da0c, // onhashchange
520 0x104: 0x4807, // details
521 0x106: 0x40008, // download
522 0x109: 0x14009, // translate
523 0x10b: 0x4230f, // contenteditable
524 0x10d: 0x36b0b, // ondragleave
525 0x10e: 0x2106, // accept
526 0x10f: 0x57a08, // selected
527 0x112: 0x1f20a, // formaction
528 0x113: 0x5b506, // center
529 0x115: 0x45510, // onloadedmetadata
530 0x116: 0x12804, // link
531 0x117: 0xdd04, // time
532 0x118: 0x19f0b, // crossorigin
533 0x119: 0x3bd07, // onfocus
534 0x11a: 0x58704, // wrap
535 0x11b: 0x42204, // icon
536 0x11d: 0x28105, // video
537 0x11e: 0x4de05, // class
538 0x121: 0x5d40e, // onvolumechange
539 0x122: 0xaa06, // onblur
540 0x123: 0x2b909, // itemscope
541 0x124: 0x61105, // style
542 0x127: 0x41e06, // public
543 0x129: 0x2320e, // formnovalidate
544 0x12a: 0x58206, // onshow
545 0x12c: 0x51706, // onplay
546 0x12d: 0x3c804, // cite
547 0x12e: 0x2bc02, // ms
548 0x12f: 0xdb0c, // ontimeupdate
549 0x130: 0x10904, // kind
550 0x131: 0x2470a, // formtarget
551 0x135: 0x3af07, // onended
552 0x136: 0x26506, // hidden
553 0x137: 0x2c01, // s
554 0x139: 0x2280a, // formmethod
555 0x13a: 0x3e805, // input
556 0x13c: 0x50b02, // h6
557 0x13d: 0xc902, // ol
558 0x13e: 0x3420b, // oncuechange
559 0x13f: 0x1e50d, // foreignobject
560 0x143: 0x4e70e, // onbeforeunload
561 0x144: 0x2bd05, // scope
562 0x145: 0x39609, // onemptied
563 0x146: 0x14b05, // defer
564 0x147: 0xc103, // xmp
565 0x148: 0x39f10, // ondurationchange
566 0x149: 0x1903, // kbd
567 0x14c: 0x47609, // onmessage
568 0x14d: 0x60006, // option
569 0x14e: 0x2eb09, // minlength
570 0x14f: 0x32807, // checked
571 0x150: 0xce08, // autoplay
572 0x152: 0x202, // br
573 0x153: 0x2360a, // novalidate
574 0x156: 0x6307, // noembed
575 0x159: 0x31007, // onclick
576 0x15a: 0x47f0b, // onmousedown
577 0x15b: 0x3a708, // onchange
578 0x15e: 0x3f209, // oninvalid
579 0x15f: 0x2bd06, // scoped
580 0x160: 0x18808, // controls
581 0x161: 0x30b05, // muted
582 0x162: 0x58d08, // sortable
583 0x163: 0x51106, // usemap
584 0x164: 0x1b80a, // figcaption
585 0x165: 0x35706, // ondrag
586 0x166: 0x26b04, // high
587 0x168: 0x3c303, // src
588 0x169: 0x15706, // poster
589 0x16b: 0x1670e, // annotation-xml
590 0x16c: 0x5f704, // step
591 0x16d: 0x4, // abbr
592 0x16e: 0x1b06, // dialog
593 0x170: 0x1202, // li
594 0x172: 0x3ed02, // mo
595 0x175: 0x1d803, // for
596 0x176: 0x1a803, // ins
597 0x178: 0x55504, // size
598 0x179: 0x43210, // onlanguagechange
599 0x17a: 0x8607, // default
600 0x17b: 0x1a03, // bdi
601 0x17c: 0x4d30a, // onpagehide
602 0x17d: 0x6907, // dirname
603 0x17e: 0x21404, // type
604 0x17f: 0x1f204, // form
605 0x181: 0x28509, // oncanplay
606 0x182: 0x6103, // dfn
607 0x183: 0x46308, // tabindex
608 0x186: 0x6502, // em
609 0x187: 0x27404, // lang
610 0x189: 0x39108, // dropzone
611 0x18a: 0x4080a, // onkeypress
612 0x18b: 0x23c08, // datetime
613 0x18c: 0x16204, // cols
614 0x18d: 0x1, // a
615 0x18e: 0x4420c, // onloadeddata
616 0x190: 0xa605, // audio
617 0x192: 0x2e05, // tbody
618 0x193: 0x22c06, // method
619 0x195: 0xf404, // loop
620 0x196: 0x29606, // iframe
621 0x198: 0x2d504, // head
622 0x19e: 0x5f108, // manifest
623 0x19f: 0xb309, // autofocus
624 0x1a0: 0x14904, // code
625 0x1a1: 0x55906, // strong
626 0x1a2: 0x30308, // multiple
627 0x1a3: 0xc05, // param
628 0x1a6: 0x21107, // enctype
629 0x1a7: 0x5b304, // face
630 0x1a8: 0xfd09, // plaintext
631 0x1a9: 0x26e02, // h1
632 0x1aa: 0x59509, // onstalled
633 0x1ad: 0x3d406, // script
634 0x1ae: 0x2db06, // spacer
635 0x1af: 0x55108, // onresize
636 0x1b0: 0x4a20b, // onmouseover
637 0x1b1: 0x5cc08, // onunload
638 0x1b2: 0x56708, // onseeked
639 0x1b4: 0x2140d, // typemustmatch
640 0x1b5: 0x1cc06, // figure
641 0x1b6: 0x4950a, // onmouseout
642 0x1b7: 0x25e03, // pre
643 0x1b8: 0x50705, // width
644 0x1b9: 0x19906, // sorted
645 0x1bb: 0x5704, // nobr
646 0x1be: 0x5302, // tt
647 0x1bf: 0x1105, // align
648 0x1c0: 0x3e607, // oninput
649 0x1c3: 0x41807, // onkeyup
650 0x1c6: 0x1c00c, // onafterprint
651 0x1c7: 0x210e, // accept-charset
652 0x1c8: 0x33c06, // itemid
653 0x1c9: 0x3e809, // inputmode
654 0x1cb: 0x53306, // strike
655 0x1cc: 0x5a903, // sub
656 0x1cd: 0x10505, // track
657 0x1ce: 0x38605, // start
658 0x1d0: 0xd608, // basefont
659 0x1d6: 0x1aa06, // source
660 0x1d7: 0x18206, // legend
661 0x1d8: 0x2d405, // thead
662 0x1da: 0x8c05, // tfoot
663 0x1dd: 0x1ec06, // object
664 0x1de: 0x6e05, // media
665 0x1df: 0x1670a, // annotation
666 0x1e0: 0x20d0b, // formenctype
667 0x1e2: 0x3d208, // noscript
668 0x1e4: 0x55505, // sizes
669 0x1e5: 0x1fc0c, // autocomplete
670 0x1e6: 0x9504, // span
671 0x1e7: 0x9808, // noframes
672 0x1e8: 0x24b06, // target
673 0x1e9: 0x38f06, // ondrop
674 0x1ea: 0x2b306, // applet
675 0x1ec: 0x5a08, // reversed
676 0x1f0: 0x2a907, // isindex
677 0x1f3: 0x27008, // hreflang
678 0x1f5: 0x2f302, // h5
679 0x1f6: 0x4f307, // address
680 0x1fa: 0x2e103, // max
681 0x1fb: 0xc30b, // placeholder
682 0x1fc: 0x2f608, // textarea
683 0x1fe: 0x4ad09, // onmouseup
684 0x1ff: 0x3800b, // ondragstart
685}
686
687const atomText = "abbradiogrouparamalignmarkbdialogaccept-charsetbodyaccesskey" +
688 "genavaluealtdetailsampatternobreversedfnoembedirnamediagroup" +
689 "ingasyncanvasidefaultfooterowspanoframesetitleaudionblurubya" +
690 "utofocusandboxmplaceholderautoplaybasefontimeupdatebdoncance" +
691 "labelooptgrouplaintextrackindisabledivarbgsoundlowbrbigblink" +
692 "blockquotebuttonabortranslatecodefercolgroupostercolorcolspa" +
693 "nnotation-xmlcommandraggablegendcontrolsmallcoordsortedcross" +
694 "originsourcefieldsetfigcaptionafterprintfigurequiredforeignO" +
695 "bjectforeignobjectformactionautocompleteerrorformenctypemust" +
696 "matchallengeformmethodformnovalidatetimeterformtargetheightm" +
697 "lhgroupreloadhiddenhigh1hreflanghttp-equivideoncanplaythroug" +
698 "h2iframeimageimglyph3isindexismappletitemscopeditemtypemarqu" +
699 "eematheaderspacermaxlength4minlength5mtextareadonlymultiplem" +
700 "utedonclickoncloseamlesspellcheckedoncontextmenuitemidoncuec" +
701 "hangeondblclickondragendondragenterondragleaveondragoverondr" +
702 "agstarticleondropzonemptiedondurationchangeonendedonerroronf" +
703 "ocusrcdocitempropenoscriptonhashchangeoninputmodeloninvalido" +
704 "nkeydownloadonkeypressrclangonkeyupublicontenteditableonlang" +
705 "uagechangeonloadeddatalistingonloadedmetadatabindexonloadsta" +
706 "rtonmessageonmousedownonmousemoveonmouseoutputonmouseoveronm" +
707 "ouseuponmousewheelonofflineononlineonpagehidesclassectionbef" +
708 "oreunloaddresshapeonpageshowidth6onpausemaponplayingonpopsta" +
709 "teonprogresstrikeytypeonratechangeonresetonresizestrongonscr" +
710 "ollonseekedonseekingonselectedonshowraponsortableonstalledon" +
711 "storageonsubmitemrefacenteronsuspendontoggleonunloadonvolume" +
712 "changeonwaitingoptimumanifestepromptoptionbeforeprintstylesu" +
713 "mmarysupsvgsystemplate"
diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go
new file mode 100644
index 0000000..52f651f
--- /dev/null
+++ b/vendor/golang.org/x/net/html/const.go
@@ -0,0 +1,102 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7// Section 12.2.3.2 of the HTML5 specification says "The following elements
8// have varying levels of special parsing rules".
9// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
10var isSpecialElementMap = map[string]bool{
11 "address": true,
12 "applet": true,
13 "area": true,
14 "article": true,
15 "aside": true,
16 "base": true,
17 "basefont": true,
18 "bgsound": true,
19 "blockquote": true,
20 "body": true,
21 "br": true,
22 "button": true,
23 "caption": true,
24 "center": true,
25 "col": true,
26 "colgroup": true,
27 "dd": true,
28 "details": true,
29 "dir": true,
30 "div": true,
31 "dl": true,
32 "dt": true,
33 "embed": true,
34 "fieldset": true,
35 "figcaption": true,
36 "figure": true,
37 "footer": true,
38 "form": true,
39 "frame": true,
40 "frameset": true,
41 "h1": true,
42 "h2": true,
43 "h3": true,
44 "h4": true,
45 "h5": true,
46 "h6": true,
47 "head": true,
48 "header": true,
49 "hgroup": true,
50 "hr": true,
51 "html": true,
52 "iframe": true,
53 "img": true,
54 "input": true,
55 "isindex": true,
56 "li": true,
57 "link": true,
58 "listing": true,
59 "marquee": true,
60 "menu": true,
61 "meta": true,
62 "nav": true,
63 "noembed": true,
64 "noframes": true,
65 "noscript": true,
66 "object": true,
67 "ol": true,
68 "p": true,
69 "param": true,
70 "plaintext": true,
71 "pre": true,
72 "script": true,
73 "section": true,
74 "select": true,
75 "source": true,
76 "style": true,
77 "summary": true,
78 "table": true,
79 "tbody": true,
80 "td": true,
81 "template": true,
82 "textarea": true,
83 "tfoot": true,
84 "th": true,
85 "thead": true,
86 "title": true,
87 "tr": true,
88 "track": true,
89 "ul": true,
90 "wbr": true,
91 "xmp": true,
92}
93
94func isSpecialElement(element *Node) bool {
95 switch element.Namespace {
96 case "", "html":
97 return isSpecialElementMap[element.Data]
98 case "svg":
99 return element.Data == "foreignObject"
100 }
101 return false
102}
diff --git a/vendor/golang.org/x/net/html/doc.go b/vendor/golang.org/x/net/html/doc.go
new file mode 100644
index 0000000..94f4968
--- /dev/null
+++ b/vendor/golang.org/x/net/html/doc.go
@@ -0,0 +1,106 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package html implements an HTML5-compliant tokenizer and parser.
7
8Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
9caller's responsibility to ensure that r provides UTF-8 encoded HTML.
10
11 z := html.NewTokenizer(r)
12
13Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
14which parses the next token and returns its type, or an error:
15
16 for {
17 tt := z.Next()
18 if tt == html.ErrorToken {
19 // ...
20 return ...
21 }
22 // Process the current token.
23 }
24
25There are two APIs for retrieving the current token. The high-level API is to
26call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
27allow optionally calling Raw after Next but before Token, Text, TagName, or
28TagAttr. In EBNF notation, the valid call sequence per token is:
29
30 Next {Raw} [ Token | Text | TagName {TagAttr} ]
31
32Token returns an independent data structure that completely describes a token.
33Entities (such as "&lt;") are unescaped, tag names and attribute keys are
34lower-cased, and attributes are collected into a []Attribute. For example:
35
36 for {
37 if z.Next() == html.ErrorToken {
38 // Returning io.EOF indicates success.
39 return z.Err()
40 }
41 emitToken(z.Token())
42 }
43
44The low-level API performs fewer allocations and copies, but the contents of
45the []byte values returned by Text, TagName and TagAttr may change on the next
46call to Next. For example, to extract an HTML page's anchor text:
47
48 depth := 0
49 for {
50 tt := z.Next()
51 switch tt {
52 case ErrorToken:
53 return z.Err()
54 case TextToken:
55 if depth > 0 {
56 // emitBytes should copy the []byte it receives,
57 // if it doesn't process it immediately.
58 emitBytes(z.Text())
59 }
60 case StartTagToken, EndTagToken:
61 tn, _ := z.TagName()
62 if len(tn) == 1 && tn[0] == 'a' {
63 if tt == StartTagToken {
64 depth++
65 } else {
66 depth--
67 }
68 }
69 }
70 }
71
72Parsing is done by calling Parse with an io.Reader, which returns the root of
73the parse tree (the document element) as a *Node. It is the caller's
74responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
75example, to process each anchor node in depth-first order:
76
77 doc, err := html.Parse(r)
78 if err != nil {
79 // ...
80 }
81 var f func(*html.Node)
82 f = func(n *html.Node) {
83 if n.Type == html.ElementNode && n.Data == "a" {
84 // Do something with n...
85 }
86 for c := n.FirstChild; c != nil; c = c.NextSibling {
87 f(c)
88 }
89 }
90 f(doc)
91
92The relevant specifications include:
93https://html.spec.whatwg.org/multipage/syntax.html and
94https://html.spec.whatwg.org/multipage/syntax.html#tokenization
95*/
96package html // import "golang.org/x/net/html"
97
98// The tokenization algorithm implemented by this package is not a line-by-line
99// transliteration of the relatively verbose state-machine in the WHATWG
100// specification. A more direct approach is used instead, where the program
101// counter implies the state, such as whether it is tokenizing a tag or a text
102// node. Specification compliance is verified by checking expected and actual
103// outputs over a test suite rather than aiming for algorithmic fidelity.
104
105// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
106// TODO(nigeltao): How does parsing interact with a JavaScript engine?
diff --git a/vendor/golang.org/x/net/html/doctype.go b/vendor/golang.org/x/net/html/doctype.go
new file mode 100644
index 0000000..c484e5a
--- /dev/null
+++ b/vendor/golang.org/x/net/html/doctype.go
@@ -0,0 +1,156 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "strings"
9)
10
11// parseDoctype parses the data from a DoctypeToken into a name,
12// public identifier, and system identifier. It returns a Node whose Type
13// is DoctypeNode, whose Data is the name, and which has attributes
14// named "system" and "public" for the two identifiers if they were present.
15// quirks is whether the document should be parsed in "quirks mode".
16func parseDoctype(s string) (n *Node, quirks bool) {
17 n = &Node{Type: DoctypeNode}
18
19 // Find the name.
20 space := strings.IndexAny(s, whitespace)
21 if space == -1 {
22 space = len(s)
23 }
24 n.Data = s[:space]
25 // The comparison to "html" is case-sensitive.
26 if n.Data != "html" {
27 quirks = true
28 }
29 n.Data = strings.ToLower(n.Data)
30 s = strings.TrimLeft(s[space:], whitespace)
31
32 if len(s) < 6 {
33 // It can't start with "PUBLIC" or "SYSTEM".
34 // Ignore the rest of the string.
35 return n, quirks || s != ""
36 }
37
38 key := strings.ToLower(s[:6])
39 s = s[6:]
40 for key == "public" || key == "system" {
41 s = strings.TrimLeft(s, whitespace)
42 if s == "" {
43 break
44 }
45 quote := s[0]
46 if quote != '"' && quote != '\'' {
47 break
48 }
49 s = s[1:]
50 q := strings.IndexRune(s, rune(quote))
51 var id string
52 if q == -1 {
53 id = s
54 s = ""
55 } else {
56 id = s[:q]
57 s = s[q+1:]
58 }
59 n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
60 if key == "public" {
61 key = "system"
62 } else {
63 key = ""
64 }
65 }
66
67 if key != "" || s != "" {
68 quirks = true
69 } else if len(n.Attr) > 0 {
70 if n.Attr[0].Key == "public" {
71 public := strings.ToLower(n.Attr[0].Val)
72 switch public {
73 case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
74 quirks = true
75 default:
76 for _, q := range quirkyIDs {
77 if strings.HasPrefix(public, q) {
78 quirks = true
79 break
80 }
81 }
82 }
83 // The following two public IDs only cause quirks mode if there is no system ID.
84 if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
85 strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
86 quirks = true
87 }
88 }
89 if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
90 strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
91 quirks = true
92 }
93 }
94
95 return n, quirks
96}
97
98// quirkyIDs is a list of public doctype identifiers that cause a document
99// to be interpreted in quirks mode. The identifiers should be in lower case.
100var quirkyIDs = []string{
101 "+//silmaril//dtd html pro v0r11 19970101//",
102 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
103 "-//as//dtd html 3.0 aswedit + extensions//",
104 "-//ietf//dtd html 2.0 level 1//",
105 "-//ietf//dtd html 2.0 level 2//",
106 "-//ietf//dtd html 2.0 strict level 1//",
107 "-//ietf//dtd html 2.0 strict level 2//",
108 "-//ietf//dtd html 2.0 strict//",
109 "-//ietf//dtd html 2.0//",
110 "-//ietf//dtd html 2.1e//",
111 "-//ietf//dtd html 3.0//",
112 "-//ietf//dtd html 3.2 final//",
113 "-//ietf//dtd html 3.2//",
114 "-//ietf//dtd html 3//",
115 "-//ietf//dtd html level 0//",
116 "-//ietf//dtd html level 1//",
117 "-//ietf//dtd html level 2//",
118 "-//ietf//dtd html level 3//",
119 "-//ietf//dtd html strict level 0//",
120 "-//ietf//dtd html strict level 1//",
121 "-//ietf//dtd html strict level 2//",
122 "-//ietf//dtd html strict level 3//",
123 "-//ietf//dtd html strict//",
124 "-//ietf//dtd html//",
125 "-//metrius//dtd metrius presentational//",
126 "-//microsoft//dtd internet explorer 2.0 html strict//",
127 "-//microsoft//dtd internet explorer 2.0 html//",
128 "-//microsoft//dtd internet explorer 2.0 tables//",
129 "-//microsoft//dtd internet explorer 3.0 html strict//",
130 "-//microsoft//dtd internet explorer 3.0 html//",
131 "-//microsoft//dtd internet explorer 3.0 tables//",
132 "-//netscape comm. corp.//dtd html//",
133 "-//netscape comm. corp.//dtd strict html//",
134 "-//o'reilly and associates//dtd html 2.0//",
135 "-//o'reilly and associates//dtd html extended 1.0//",
136 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
137 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
138 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
139 "-//spyglass//dtd html 2.0 extended//",
140 "-//sq//dtd html 2.0 hotmetal + extensions//",
141 "-//sun microsystems corp.//dtd hotjava html//",
142 "-//sun microsystems corp.//dtd hotjava strict html//",
143 "-//w3c//dtd html 3 1995-03-24//",
144 "-//w3c//dtd html 3.2 draft//",
145 "-//w3c//dtd html 3.2 final//",
146 "-//w3c//dtd html 3.2//",
147 "-//w3c//dtd html 3.2s draft//",
148 "-//w3c//dtd html 4.0 frameset//",
149 "-//w3c//dtd html 4.0 transitional//",
150 "-//w3c//dtd html experimental 19960712//",
151 "-//w3c//dtd html experimental 970421//",
152 "-//w3c//dtd w3 html//",
153 "-//w3o//dtd w3 html 3.0//",
154 "-//webtechs//dtd mozilla html 2.0//",
155 "-//webtechs//dtd mozilla html//",
156}
diff --git a/vendor/golang.org/x/net/html/entity.go b/vendor/golang.org/x/net/html/entity.go
new file mode 100644
index 0000000..a50c04c
--- /dev/null
+++ b/vendor/golang.org/x/net/html/entity.go
@@ -0,0 +1,2253 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7// All entities that do not end with ';' are 6 or fewer bytes long.
8const longestEntityWithoutSemicolon = 6
9
10// entity is a map from HTML entity names to their values. The semicolon matters:
11// https://html.spec.whatwg.org/multipage/syntax.html#named-character-references
12// lists both "amp" and "amp;" as two separate entries.
13//
14// Note that the HTML5 list is larger than the HTML4 list at
15// http://www.w3.org/TR/html4/sgml/entities.html
16var entity = map[string]rune{
17 "AElig;": '\U000000C6',
18 "AMP;": '\U00000026',
19 "Aacute;": '\U000000C1',
20 "Abreve;": '\U00000102',
21 "Acirc;": '\U000000C2',
22 "Acy;": '\U00000410',
23 "Afr;": '\U0001D504',
24 "Agrave;": '\U000000C0',
25 "Alpha;": '\U00000391',
26 "Amacr;": '\U00000100',
27 "And;": '\U00002A53',
28 "Aogon;": '\U00000104',
29 "Aopf;": '\U0001D538',
30 "ApplyFunction;": '\U00002061',
31 "Aring;": '\U000000C5',
32 "Ascr;": '\U0001D49C',
33 "Assign;": '\U00002254',
34 "Atilde;": '\U000000C3',
35 "Auml;": '\U000000C4',
36 "Backslash;": '\U00002216',
37 "Barv;": '\U00002AE7',
38 "Barwed;": '\U00002306',
39 "Bcy;": '\U00000411',
40 "Because;": '\U00002235',
41 "Bernoullis;": '\U0000212C',
42 "Beta;": '\U00000392',
43 "Bfr;": '\U0001D505',
44 "Bopf;": '\U0001D539',
45 "Breve;": '\U000002D8',
46 "Bscr;": '\U0000212C',
47 "Bumpeq;": '\U0000224E',
48 "CHcy;": '\U00000427',
49 "COPY;": '\U000000A9',
50 "Cacute;": '\U00000106',
51 "Cap;": '\U000022D2',
52 "CapitalDifferentialD;": '\U00002145',
53 "Cayleys;": '\U0000212D',
54 "Ccaron;": '\U0000010C',
55 "Ccedil;": '\U000000C7',
56 "Ccirc;": '\U00000108',
57 "Cconint;": '\U00002230',
58 "Cdot;": '\U0000010A',
59 "Cedilla;": '\U000000B8',
60 "CenterDot;": '\U000000B7',
61 "Cfr;": '\U0000212D',
62 "Chi;": '\U000003A7',
63 "CircleDot;": '\U00002299',
64 "CircleMinus;": '\U00002296',
65 "CirclePlus;": '\U00002295',
66 "CircleTimes;": '\U00002297',
67 "ClockwiseContourIntegral;": '\U00002232',
68 "CloseCurlyDoubleQuote;": '\U0000201D',
69 "CloseCurlyQuote;": '\U00002019',
70 "Colon;": '\U00002237',
71 "Colone;": '\U00002A74',
72 "Congruent;": '\U00002261',
73 "Conint;": '\U0000222F',
74 "ContourIntegral;": '\U0000222E',
75 "Copf;": '\U00002102',
76 "Coproduct;": '\U00002210',
77 "CounterClockwiseContourIntegral;": '\U00002233',
78 "Cross;": '\U00002A2F',
79 "Cscr;": '\U0001D49E',
80 "Cup;": '\U000022D3',
81 "CupCap;": '\U0000224D',
82 "DD;": '\U00002145',
83 "DDotrahd;": '\U00002911',
84 "DJcy;": '\U00000402',
85 "DScy;": '\U00000405',
86 "DZcy;": '\U0000040F',
87 "Dagger;": '\U00002021',
88 "Darr;": '\U000021A1',
89 "Dashv;": '\U00002AE4',
90 "Dcaron;": '\U0000010E',
91 "Dcy;": '\U00000414',
92 "Del;": '\U00002207',
93 "Delta;": '\U00000394',
94 "Dfr;": '\U0001D507',
95 "DiacriticalAcute;": '\U000000B4',
96 "DiacriticalDot;": '\U000002D9',
97 "DiacriticalDoubleAcute;": '\U000002DD',
98 "DiacriticalGrave;": '\U00000060',
99 "DiacriticalTilde;": '\U000002DC',
100 "Diamond;": '\U000022C4',
101 "DifferentialD;": '\U00002146',
102 "Dopf;": '\U0001D53B',
103 "Dot;": '\U000000A8',
104 "DotDot;": '\U000020DC',
105 "DotEqual;": '\U00002250',
106 "DoubleContourIntegral;": '\U0000222F',
107 "DoubleDot;": '\U000000A8',
108 "DoubleDownArrow;": '\U000021D3',
109 "DoubleLeftArrow;": '\U000021D0',
110 "DoubleLeftRightArrow;": '\U000021D4',
111 "DoubleLeftTee;": '\U00002AE4',
112 "DoubleLongLeftArrow;": '\U000027F8',
113 "DoubleLongLeftRightArrow;": '\U000027FA',
114 "DoubleLongRightArrow;": '\U000027F9',
115 "DoubleRightArrow;": '\U000021D2',
116 "DoubleRightTee;": '\U000022A8',
117 "DoubleUpArrow;": '\U000021D1',
118 "DoubleUpDownArrow;": '\U000021D5',
119 "DoubleVerticalBar;": '\U00002225',
120 "DownArrow;": '\U00002193',
121 "DownArrowBar;": '\U00002913',
122 "DownArrowUpArrow;": '\U000021F5',
123 "DownBreve;": '\U00000311',
124 "DownLeftRightVector;": '\U00002950',
125 "DownLeftTeeVector;": '\U0000295E',
126 "DownLeftVector;": '\U000021BD',
127 "DownLeftVectorBar;": '\U00002956',
128 "DownRightTeeVector;": '\U0000295F',
129 "DownRightVector;": '\U000021C1',
130 "DownRightVectorBar;": '\U00002957',
131 "DownTee;": '\U000022A4',
132 "DownTeeArrow;": '\U000021A7',
133 "Downarrow;": '\U000021D3',
134 "Dscr;": '\U0001D49F',
135 "Dstrok;": '\U00000110',
136 "ENG;": '\U0000014A',
137 "ETH;": '\U000000D0',
138 "Eacute;": '\U000000C9',
139 "Ecaron;": '\U0000011A',
140 "Ecirc;": '\U000000CA',
141 "Ecy;": '\U0000042D',
142 "Edot;": '\U00000116',
143 "Efr;": '\U0001D508',
144 "Egrave;": '\U000000C8',
145 "Element;": '\U00002208',
146 "Emacr;": '\U00000112',
147 "EmptySmallSquare;": '\U000025FB',
148 "EmptyVerySmallSquare;": '\U000025AB',
149 "Eogon;": '\U00000118',
150 "Eopf;": '\U0001D53C',
151 "Epsilon;": '\U00000395',
152 "Equal;": '\U00002A75',
153 "EqualTilde;": '\U00002242',
154 "Equilibrium;": '\U000021CC',
155 "Escr;": '\U00002130',
156 "Esim;": '\U00002A73',
157 "Eta;": '\U00000397',
158 "Euml;": '\U000000CB',
159 "Exists;": '\U00002203',
160 "ExponentialE;": '\U00002147',
161 "Fcy;": '\U00000424',
162 "Ffr;": '\U0001D509',
163 "FilledSmallSquare;": '\U000025FC',
164 "FilledVerySmallSquare;": '\U000025AA',
165 "Fopf;": '\U0001D53D',
166 "ForAll;": '\U00002200',
167 "Fouriertrf;": '\U00002131',
168 "Fscr;": '\U00002131',
169 "GJcy;": '\U00000403',
170 "GT;": '\U0000003E',
171 "Gamma;": '\U00000393',
172 "Gammad;": '\U000003DC',
173 "Gbreve;": '\U0000011E',
174 "Gcedil;": '\U00000122',
175 "Gcirc;": '\U0000011C',
176 "Gcy;": '\U00000413',
177 "Gdot;": '\U00000120',
178 "Gfr;": '\U0001D50A',
179 "Gg;": '\U000022D9',
180 "Gopf;": '\U0001D53E',
181 "GreaterEqual;": '\U00002265',
182 "GreaterEqualLess;": '\U000022DB',
183 "GreaterFullEqual;": '\U00002267',
184 "GreaterGreater;": '\U00002AA2',
185 "GreaterLess;": '\U00002277',
186 "GreaterSlantEqual;": '\U00002A7E',
187 "GreaterTilde;": '\U00002273',
188 "Gscr;": '\U0001D4A2',
189 "Gt;": '\U0000226B',
190 "HARDcy;": '\U0000042A',
191 "Hacek;": '\U000002C7',
192 "Hat;": '\U0000005E',
193 "Hcirc;": '\U00000124',
194 "Hfr;": '\U0000210C',
195 "HilbertSpace;": '\U0000210B',
196 "Hopf;": '\U0000210D',
197 "HorizontalLine;": '\U00002500',
198 "Hscr;": '\U0000210B',
199 "Hstrok;": '\U00000126',
200 "HumpDownHump;": '\U0000224E',
201 "HumpEqual;": '\U0000224F',
202 "IEcy;": '\U00000415',
203 "IJlig;": '\U00000132',
204 "IOcy;": '\U00000401',
205 "Iacute;": '\U000000CD',
206 "Icirc;": '\U000000CE',
207 "Icy;": '\U00000418',
208 "Idot;": '\U00000130',
209 "Ifr;": '\U00002111',
210 "Igrave;": '\U000000CC',
211 "Im;": '\U00002111',
212 "Imacr;": '\U0000012A',
213 "ImaginaryI;": '\U00002148',
214 "Implies;": '\U000021D2',
215 "Int;": '\U0000222C',
216 "Integral;": '\U0000222B',
217 "Intersection;": '\U000022C2',
218 "InvisibleComma;": '\U00002063',
219 "InvisibleTimes;": '\U00002062',
220 "Iogon;": '\U0000012E',
221 "Iopf;": '\U0001D540',
222 "Iota;": '\U00000399',
223 "Iscr;": '\U00002110',
224 "Itilde;": '\U00000128',
225 "Iukcy;": '\U00000406',
226 "Iuml;": '\U000000CF',
227 "Jcirc;": '\U00000134',
228 "Jcy;": '\U00000419',
229 "Jfr;": '\U0001D50D',
230 "Jopf;": '\U0001D541',
231 "Jscr;": '\U0001D4A5',
232 "Jsercy;": '\U00000408',
233 "Jukcy;": '\U00000404',
234 "KHcy;": '\U00000425',
235 "KJcy;": '\U0000040C',
236 "Kappa;": '\U0000039A',
237 "Kcedil;": '\U00000136',
238 "Kcy;": '\U0000041A',
239 "Kfr;": '\U0001D50E',
240 "Kopf;": '\U0001D542',
241 "Kscr;": '\U0001D4A6',
242 "LJcy;": '\U00000409',
243 "LT;": '\U0000003C',
244 "Lacute;": '\U00000139',
245 "Lambda;": '\U0000039B',
246 "Lang;": '\U000027EA',
247 "Laplacetrf;": '\U00002112',
248 "Larr;": '\U0000219E',
249 "Lcaron;": '\U0000013D',
250 "Lcedil;": '\U0000013B',
251 "Lcy;": '\U0000041B',
252 "LeftAngleBracket;": '\U000027E8',
253 "LeftArrow;": '\U00002190',
254 "LeftArrowBar;": '\U000021E4',
255 "LeftArrowRightArrow;": '\U000021C6',
256 "LeftCeiling;": '\U00002308',
257 "LeftDoubleBracket;": '\U000027E6',
258 "LeftDownTeeVector;": '\U00002961',
259 "LeftDownVector;": '\U000021C3',
260 "LeftDownVectorBar;": '\U00002959',
261 "LeftFloor;": '\U0000230A',
262 "LeftRightArrow;": '\U00002194',
263 "LeftRightVector;": '\U0000294E',
264 "LeftTee;": '\U000022A3',
265 "LeftTeeArrow;": '\U000021A4',
266 "LeftTeeVector;": '\U0000295A',
267 "LeftTriangle;": '\U000022B2',
268 "LeftTriangleBar;": '\U000029CF',
269 "LeftTriangleEqual;": '\U000022B4',
270 "LeftUpDownVector;": '\U00002951',
271 "LeftUpTeeVector;": '\U00002960',
272 "LeftUpVector;": '\U000021BF',
273 "LeftUpVectorBar;": '\U00002958',
274 "LeftVector;": '\U000021BC',
275 "LeftVectorBar;": '\U00002952',
276 "Leftarrow;": '\U000021D0',
277 "Leftrightarrow;": '\U000021D4',
278 "LessEqualGreater;": '\U000022DA',
279 "LessFullEqual;": '\U00002266',
280 "LessGreater;": '\U00002276',
281 "LessLess;": '\U00002AA1',
282 "LessSlantEqual;": '\U00002A7D',
283 "LessTilde;": '\U00002272',
284 "Lfr;": '\U0001D50F',
285 "Ll;": '\U000022D8',
286 "Lleftarrow;": '\U000021DA',
287 "Lmidot;": '\U0000013F',
288 "LongLeftArrow;": '\U000027F5',
289 "LongLeftRightArrow;": '\U000027F7',
290 "LongRightArrow;": '\U000027F6',
291 "Longleftarrow;": '\U000027F8',
292 "Longleftrightarrow;": '\U000027FA',
293 "Longrightarrow;": '\U000027F9',
294 "Lopf;": '\U0001D543',
295 "LowerLeftArrow;": '\U00002199',
296 "LowerRightArrow;": '\U00002198',
297 "Lscr;": '\U00002112',
298 "Lsh;": '\U000021B0',
299 "Lstrok;": '\U00000141',
300 "Lt;": '\U0000226A',
301 "Map;": '\U00002905',
302 "Mcy;": '\U0000041C',
303 "MediumSpace;": '\U0000205F',
304 "Mellintrf;": '\U00002133',
305 "Mfr;": '\U0001D510',
306 "MinusPlus;": '\U00002213',
307 "Mopf;": '\U0001D544',
308 "Mscr;": '\U00002133',
309 "Mu;": '\U0000039C',
310 "NJcy;": '\U0000040A',
311 "Nacute;": '\U00000143',
312 "Ncaron;": '\U00000147',
313 "Ncedil;": '\U00000145',
314 "Ncy;": '\U0000041D',
315 "NegativeMediumSpace;": '\U0000200B',
316 "NegativeThickSpace;": '\U0000200B',
317 "NegativeThinSpace;": '\U0000200B',
318 "NegativeVeryThinSpace;": '\U0000200B',
319 "NestedGreaterGreater;": '\U0000226B',
320 "NestedLessLess;": '\U0000226A',
321 "NewLine;": '\U0000000A',
322 "Nfr;": '\U0001D511',
323 "NoBreak;": '\U00002060',
324 "NonBreakingSpace;": '\U000000A0',
325 "Nopf;": '\U00002115',
326 "Not;": '\U00002AEC',
327 "NotCongruent;": '\U00002262',
328 "NotCupCap;": '\U0000226D',
329 "NotDoubleVerticalBar;": '\U00002226',
330 "NotElement;": '\U00002209',
331 "NotEqual;": '\U00002260',
332 "NotExists;": '\U00002204',
333 "NotGreater;": '\U0000226F',
334 "NotGreaterEqual;": '\U00002271',
335 "NotGreaterLess;": '\U00002279',
336 "NotGreaterTilde;": '\U00002275',
337 "NotLeftTriangle;": '\U000022EA',
338 "NotLeftTriangleEqual;": '\U000022EC',
339 "NotLess;": '\U0000226E',
340 "NotLessEqual;": '\U00002270',
341 "NotLessGreater;": '\U00002278',
342 "NotLessTilde;": '\U00002274',
343 "NotPrecedes;": '\U00002280',
344 "NotPrecedesSlantEqual;": '\U000022E0',
345 "NotReverseElement;": '\U0000220C',
346 "NotRightTriangle;": '\U000022EB',
347 "NotRightTriangleEqual;": '\U000022ED',
348 "NotSquareSubsetEqual;": '\U000022E2',
349 "NotSquareSupersetEqual;": '\U000022E3',
350 "NotSubsetEqual;": '\U00002288',
351 "NotSucceeds;": '\U00002281',
352 "NotSucceedsSlantEqual;": '\U000022E1',
353 "NotSupersetEqual;": '\U00002289',
354 "NotTilde;": '\U00002241',
355 "NotTildeEqual;": '\U00002244',
356 "NotTildeFullEqual;": '\U00002247',
357 "NotTildeTilde;": '\U00002249',
358 "NotVerticalBar;": '\U00002224',
359 "Nscr;": '\U0001D4A9',
360 "Ntilde;": '\U000000D1',
361 "Nu;": '\U0000039D',
362 "OElig;": '\U00000152',
363 "Oacute;": '\U000000D3',
364 "Ocirc;": '\U000000D4',
365 "Ocy;": '\U0000041E',
366 "Odblac;": '\U00000150',
367 "Ofr;": '\U0001D512',
368 "Ograve;": '\U000000D2',
369 "Omacr;": '\U0000014C',
370 "Omega;": '\U000003A9',
371 "Omicron;": '\U0000039F',
372 "Oopf;": '\U0001D546',
373 "OpenCurlyDoubleQuote;": '\U0000201C',
374 "OpenCurlyQuote;": '\U00002018',
375 "Or;": '\U00002A54',
376 "Oscr;": '\U0001D4AA',
377 "Oslash;": '\U000000D8',
378 "Otilde;": '\U000000D5',
379 "Otimes;": '\U00002A37',
380 "Ouml;": '\U000000D6',
381 "OverBar;": '\U0000203E',
382 "OverBrace;": '\U000023DE',
383 "OverBracket;": '\U000023B4',
384 "OverParenthesis;": '\U000023DC',
385 "PartialD;": '\U00002202',
386 "Pcy;": '\U0000041F',
387 "Pfr;": '\U0001D513',
388 "Phi;": '\U000003A6',
389 "Pi;": '\U000003A0',
390 "PlusMinus;": '\U000000B1',
391 "Poincareplane;": '\U0000210C',
392 "Popf;": '\U00002119',
393 "Pr;": '\U00002ABB',
394 "Precedes;": '\U0000227A',
395 "PrecedesEqual;": '\U00002AAF',
396 "PrecedesSlantEqual;": '\U0000227C',
397 "PrecedesTilde;": '\U0000227E',
398 "Prime;": '\U00002033',
399 "Product;": '\U0000220F',
400 "Proportion;": '\U00002237',
401 "Proportional;": '\U0000221D',
402 "Pscr;": '\U0001D4AB',
403 "Psi;": '\U000003A8',
404 "QUOT;": '\U00000022',
405 "Qfr;": '\U0001D514',
406 "Qopf;": '\U0000211A',
407 "Qscr;": '\U0001D4AC',
408 "RBarr;": '\U00002910',
409 "REG;": '\U000000AE',
410 "Racute;": '\U00000154',
411 "Rang;": '\U000027EB',
412 "Rarr;": '\U000021A0',
413 "Rarrtl;": '\U00002916',
414 "Rcaron;": '\U00000158',
415 "Rcedil;": '\U00000156',
416 "Rcy;": '\U00000420',
417 "Re;": '\U0000211C',
418 "ReverseElement;": '\U0000220B',
419 "ReverseEquilibrium;": '\U000021CB',
420 "ReverseUpEquilibrium;": '\U0000296F',
421 "Rfr;": '\U0000211C',
422 "Rho;": '\U000003A1',
423 "RightAngleBracket;": '\U000027E9',
424 "RightArrow;": '\U00002192',
425 "RightArrowBar;": '\U000021E5',
426 "RightArrowLeftArrow;": '\U000021C4',
427 "RightCeiling;": '\U00002309',
428 "RightDoubleBracket;": '\U000027E7',
429 "RightDownTeeVector;": '\U0000295D',
430 "RightDownVector;": '\U000021C2',
431 "RightDownVectorBar;": '\U00002955',
432 "RightFloor;": '\U0000230B',
433 "RightTee;": '\U000022A2',
434 "RightTeeArrow;": '\U000021A6',
435 "RightTeeVector;": '\U0000295B',
436 "RightTriangle;": '\U000022B3',
437 "RightTriangleBar;": '\U000029D0',
438 "RightTriangleEqual;": '\U000022B5',
439 "RightUpDownVector;": '\U0000294F',
440 "RightUpTeeVector;": '\U0000295C',
441 "RightUpVector;": '\U000021BE',
442 "RightUpVectorBar;": '\U00002954',
443 "RightVector;": '\U000021C0',
444 "RightVectorBar;": '\U00002953',
445 "Rightarrow;": '\U000021D2',
446 "Ropf;": '\U0000211D',
447 "RoundImplies;": '\U00002970',
448 "Rrightarrow;": '\U000021DB',
449 "Rscr;": '\U0000211B',
450 "Rsh;": '\U000021B1',
451 "RuleDelayed;": '\U000029F4',
452 "SHCHcy;": '\U00000429',
453 "SHcy;": '\U00000428',
454 "SOFTcy;": '\U0000042C',
455 "Sacute;": '\U0000015A',
456 "Sc;": '\U00002ABC',
457 "Scaron;": '\U00000160',
458 "Scedil;": '\U0000015E',
459 "Scirc;": '\U0000015C',
460 "Scy;": '\U00000421',
461 "Sfr;": '\U0001D516',
462 "ShortDownArrow;": '\U00002193',
463 "ShortLeftArrow;": '\U00002190',
464 "ShortRightArrow;": '\U00002192',
465 "ShortUpArrow;": '\U00002191',
466 "Sigma;": '\U000003A3',
467 "SmallCircle;": '\U00002218',
468 "Sopf;": '\U0001D54A',
469 "Sqrt;": '\U0000221A',
470 "Square;": '\U000025A1',
471 "SquareIntersection;": '\U00002293',
472 "SquareSubset;": '\U0000228F',
473 "SquareSubsetEqual;": '\U00002291',
474 "SquareSuperset;": '\U00002290',
475 "SquareSupersetEqual;": '\U00002292',
476 "SquareUnion;": '\U00002294',
477 "Sscr;": '\U0001D4AE',
478 "Star;": '\U000022C6',
479 "Sub;": '\U000022D0',
480 "Subset;": '\U000022D0',
481 "SubsetEqual;": '\U00002286',
482 "Succeeds;": '\U0000227B',
483 "SucceedsEqual;": '\U00002AB0',
484 "SucceedsSlantEqual;": '\U0000227D',
485 "SucceedsTilde;": '\U0000227F',
486 "SuchThat;": '\U0000220B',
487 "Sum;": '\U00002211',
488 "Sup;": '\U000022D1',
489 "Superset;": '\U00002283',
490 "SupersetEqual;": '\U00002287',
491 "Supset;": '\U000022D1',
492 "THORN;": '\U000000DE',
493 "TRADE;": '\U00002122',
494 "TSHcy;": '\U0000040B',
495 "TScy;": '\U00000426',
496 "Tab;": '\U00000009',
497 "Tau;": '\U000003A4',
498 "Tcaron;": '\U00000164',
499 "Tcedil;": '\U00000162',
500 "Tcy;": '\U00000422',
501 "Tfr;": '\U0001D517',
502 "Therefore;": '\U00002234',
503 "Theta;": '\U00000398',
504 "ThinSpace;": '\U00002009',
505 "Tilde;": '\U0000223C',
506 "TildeEqual;": '\U00002243',
507 "TildeFullEqual;": '\U00002245',
508 "TildeTilde;": '\U00002248',
509 "Topf;": '\U0001D54B',
510 "TripleDot;": '\U000020DB',
511 "Tscr;": '\U0001D4AF',
512 "Tstrok;": '\U00000166',
513 "Uacute;": '\U000000DA',
514 "Uarr;": '\U0000219F',
515 "Uarrocir;": '\U00002949',
516 "Ubrcy;": '\U0000040E',
517 "Ubreve;": '\U0000016C',
518 "Ucirc;": '\U000000DB',
519 "Ucy;": '\U00000423',
520 "Udblac;": '\U00000170',
521 "Ufr;": '\U0001D518',
522 "Ugrave;": '\U000000D9',
523 "Umacr;": '\U0000016A',
524 "UnderBar;": '\U0000005F',
525 "UnderBrace;": '\U000023DF',
526 "UnderBracket;": '\U000023B5',
527 "UnderParenthesis;": '\U000023DD',
528 "Union;": '\U000022C3',
529 "UnionPlus;": '\U0000228E',
530 "Uogon;": '\U00000172',
531 "Uopf;": '\U0001D54C',
532 "UpArrow;": '\U00002191',
533 "UpArrowBar;": '\U00002912',
534 "UpArrowDownArrow;": '\U000021C5',
535 "UpDownArrow;": '\U00002195',
536 "UpEquilibrium;": '\U0000296E',
537 "UpTee;": '\U000022A5',
538 "UpTeeArrow;": '\U000021A5',
539 "Uparrow;": '\U000021D1',
540 "Updownarrow;": '\U000021D5',
541 "UpperLeftArrow;": '\U00002196',
542 "UpperRightArrow;": '\U00002197',
543 "Upsi;": '\U000003D2',
544 "Upsilon;": '\U000003A5',
545 "Uring;": '\U0000016E',
546 "Uscr;": '\U0001D4B0',
547 "Utilde;": '\U00000168',
548 "Uuml;": '\U000000DC',
549 "VDash;": '\U000022AB',
550 "Vbar;": '\U00002AEB',
551 "Vcy;": '\U00000412',
552 "Vdash;": '\U000022A9',
553 "Vdashl;": '\U00002AE6',
554 "Vee;": '\U000022C1',
555 "Verbar;": '\U00002016',
556 "Vert;": '\U00002016',
557 "VerticalBar;": '\U00002223',
558 "VerticalLine;": '\U0000007C',
559 "VerticalSeparator;": '\U00002758',
560 "VerticalTilde;": '\U00002240',
561 "VeryThinSpace;": '\U0000200A',
562 "Vfr;": '\U0001D519',
563 "Vopf;": '\U0001D54D',
564 "Vscr;": '\U0001D4B1',
565 "Vvdash;": '\U000022AA',
566 "Wcirc;": '\U00000174',
567 "Wedge;": '\U000022C0',
568 "Wfr;": '\U0001D51A',
569 "Wopf;": '\U0001D54E',
570 "Wscr;": '\U0001D4B2',
571 "Xfr;": '\U0001D51B',
572 "Xi;": '\U0000039E',
573 "Xopf;": '\U0001D54F',
574 "Xscr;": '\U0001D4B3',
575 "YAcy;": '\U0000042F',
576 "YIcy;": '\U00000407',
577 "YUcy;": '\U0000042E',
578 "Yacute;": '\U000000DD',
579 "Ycirc;": '\U00000176',
580 "Ycy;": '\U0000042B',
581 "Yfr;": '\U0001D51C',
582 "Yopf;": '\U0001D550',
583 "Yscr;": '\U0001D4B4',
584 "Yuml;": '\U00000178',
585 "ZHcy;": '\U00000416',
586 "Zacute;": '\U00000179',
587 "Zcaron;": '\U0000017D',
588 "Zcy;": '\U00000417',
589 "Zdot;": '\U0000017B',
590 "ZeroWidthSpace;": '\U0000200B',
591 "Zeta;": '\U00000396',
592 "Zfr;": '\U00002128',
593 "Zopf;": '\U00002124',
594 "Zscr;": '\U0001D4B5',
595 "aacute;": '\U000000E1',
596 "abreve;": '\U00000103',
597 "ac;": '\U0000223E',
598 "acd;": '\U0000223F',
599 "acirc;": '\U000000E2',
600 "acute;": '\U000000B4',
601 "acy;": '\U00000430',
602 "aelig;": '\U000000E6',
603 "af;": '\U00002061',
604 "afr;": '\U0001D51E',
605 "agrave;": '\U000000E0',
606 "alefsym;": '\U00002135',
607 "aleph;": '\U00002135',
608 "alpha;": '\U000003B1',
609 "amacr;": '\U00000101',
610 "amalg;": '\U00002A3F',
611 "amp;": '\U00000026',
612 "and;": '\U00002227',
613 "andand;": '\U00002A55',
614 "andd;": '\U00002A5C',
615 "andslope;": '\U00002A58',
616 "andv;": '\U00002A5A',
617 "ang;": '\U00002220',
618 "ange;": '\U000029A4',
619 "angle;": '\U00002220',
620 "angmsd;": '\U00002221',
621 "angmsdaa;": '\U000029A8',
622 "angmsdab;": '\U000029A9',
623 "angmsdac;": '\U000029AA',
624 "angmsdad;": '\U000029AB',
625 "angmsdae;": '\U000029AC',
626 "angmsdaf;": '\U000029AD',
627 "angmsdag;": '\U000029AE',
628 "angmsdah;": '\U000029AF',
629 "angrt;": '\U0000221F',
630 "angrtvb;": '\U000022BE',
631 "angrtvbd;": '\U0000299D',
632 "angsph;": '\U00002222',
633 "angst;": '\U000000C5',
634 "angzarr;": '\U0000237C',
635 "aogon;": '\U00000105',
636 "aopf;": '\U0001D552',
637 "ap;": '\U00002248',
638 "apE;": '\U00002A70',
639 "apacir;": '\U00002A6F',
640 "ape;": '\U0000224A',
641 "apid;": '\U0000224B',
642 "apos;": '\U00000027',
643 "approx;": '\U00002248',
644 "approxeq;": '\U0000224A',
645 "aring;": '\U000000E5',
646 "ascr;": '\U0001D4B6',
647 "ast;": '\U0000002A',
648 "asymp;": '\U00002248',
649 "asympeq;": '\U0000224D',
650 "atilde;": '\U000000E3',
651 "auml;": '\U000000E4',
652 "awconint;": '\U00002233',
653 "awint;": '\U00002A11',
654 "bNot;": '\U00002AED',
655 "backcong;": '\U0000224C',
656 "backepsilon;": '\U000003F6',
657 "backprime;": '\U00002035',
658 "backsim;": '\U0000223D',
659 "backsimeq;": '\U000022CD',
660 "barvee;": '\U000022BD',
661 "barwed;": '\U00002305',
662 "barwedge;": '\U00002305',
663 "bbrk;": '\U000023B5',
664 "bbrktbrk;": '\U000023B6',
665 "bcong;": '\U0000224C',
666 "bcy;": '\U00000431',
667 "bdquo;": '\U0000201E',
668 "becaus;": '\U00002235',
669 "because;": '\U00002235',
670 "bemptyv;": '\U000029B0',
671 "bepsi;": '\U000003F6',
672 "bernou;": '\U0000212C',
673 "beta;": '\U000003B2',
674 "beth;": '\U00002136',
675 "between;": '\U0000226C',
676 "bfr;": '\U0001D51F',
677 "bigcap;": '\U000022C2',
678 "bigcirc;": '\U000025EF',
679 "bigcup;": '\U000022C3',
680 "bigodot;": '\U00002A00',
681 "bigoplus;": '\U00002A01',
682 "bigotimes;": '\U00002A02',
683 "bigsqcup;": '\U00002A06',
684 "bigstar;": '\U00002605',
685 "bigtriangledown;": '\U000025BD',
686 "bigtriangleup;": '\U000025B3',
687 "biguplus;": '\U00002A04',
688 "bigvee;": '\U000022C1',
689 "bigwedge;": '\U000022C0',
690 "bkarow;": '\U0000290D',
691 "blacklozenge;": '\U000029EB',
692 "blacksquare;": '\U000025AA',
693 "blacktriangle;": '\U000025B4',
694 "blacktriangledown;": '\U000025BE',
695 "blacktriangleleft;": '\U000025C2',
696 "blacktriangleright;": '\U000025B8',
697 "blank;": '\U00002423',
698 "blk12;": '\U00002592',
699 "blk14;": '\U00002591',
700 "blk34;": '\U00002593',
701 "block;": '\U00002588',
702 "bnot;": '\U00002310',
703 "bopf;": '\U0001D553',
704 "bot;": '\U000022A5',
705 "bottom;": '\U000022A5',
706 "bowtie;": '\U000022C8',
707 "boxDL;": '\U00002557',
708 "boxDR;": '\U00002554',
709 "boxDl;": '\U00002556',
710 "boxDr;": '\U00002553',
711 "boxH;": '\U00002550',
712 "boxHD;": '\U00002566',
713 "boxHU;": '\U00002569',
714 "boxHd;": '\U00002564',
715 "boxHu;": '\U00002567',
716 "boxUL;": '\U0000255D',
717 "boxUR;": '\U0000255A',
718 "boxUl;": '\U0000255C',
719 "boxUr;": '\U00002559',
720 "boxV;": '\U00002551',
721 "boxVH;": '\U0000256C',
722 "boxVL;": '\U00002563',
723 "boxVR;": '\U00002560',
724 "boxVh;": '\U0000256B',
725 "boxVl;": '\U00002562',
726 "boxVr;": '\U0000255F',
727 "boxbox;": '\U000029C9',
728 "boxdL;": '\U00002555',
729 "boxdR;": '\U00002552',
730 "boxdl;": '\U00002510',
731 "boxdr;": '\U0000250C',
732 "boxh;": '\U00002500',
733 "boxhD;": '\U00002565',
734 "boxhU;": '\U00002568',
735 "boxhd;": '\U0000252C',
736 "boxhu;": '\U00002534',
737 "boxminus;": '\U0000229F',
738 "boxplus;": '\U0000229E',
739 "boxtimes;": '\U000022A0',
740 "boxuL;": '\U0000255B',
741 "boxuR;": '\U00002558',
742 "boxul;": '\U00002518',
743 "boxur;": '\U00002514',
744 "boxv;": '\U00002502',
745 "boxvH;": '\U0000256A',
746 "boxvL;": '\U00002561',
747 "boxvR;": '\U0000255E',
748 "boxvh;": '\U0000253C',
749 "boxvl;": '\U00002524',
750 "boxvr;": '\U0000251C',
751 "bprime;": '\U00002035',
752 "breve;": '\U000002D8',
753 "brvbar;": '\U000000A6',
754 "bscr;": '\U0001D4B7',
755 "bsemi;": '\U0000204F',
756 "bsim;": '\U0000223D',
757 "bsime;": '\U000022CD',
758 "bsol;": '\U0000005C',
759 "bsolb;": '\U000029C5',
760 "bsolhsub;": '\U000027C8',
761 "bull;": '\U00002022',
762 "bullet;": '\U00002022',
763 "bump;": '\U0000224E',
764 "bumpE;": '\U00002AAE',
765 "bumpe;": '\U0000224F',
766 "bumpeq;": '\U0000224F',
767 "cacute;": '\U00000107',
768 "cap;": '\U00002229',
769 "capand;": '\U00002A44',
770 "capbrcup;": '\U00002A49',
771 "capcap;": '\U00002A4B',
772 "capcup;": '\U00002A47',
773 "capdot;": '\U00002A40',
774 "caret;": '\U00002041',
775 "caron;": '\U000002C7',
776 "ccaps;": '\U00002A4D',
777 "ccaron;": '\U0000010D',
778 "ccedil;": '\U000000E7',
779 "ccirc;": '\U00000109',
780 "ccups;": '\U00002A4C',
781 "ccupssm;": '\U00002A50',
782 "cdot;": '\U0000010B',
783 "cedil;": '\U000000B8',
784 "cemptyv;": '\U000029B2',
785 "cent;": '\U000000A2',
786 "centerdot;": '\U000000B7',
787 "cfr;": '\U0001D520',
788 "chcy;": '\U00000447',
789 "check;": '\U00002713',
790 "checkmark;": '\U00002713',
791 "chi;": '\U000003C7',
792 "cir;": '\U000025CB',
793 "cirE;": '\U000029C3',
794 "circ;": '\U000002C6',
795 "circeq;": '\U00002257',
796 "circlearrowleft;": '\U000021BA',
797 "circlearrowright;": '\U000021BB',
798 "circledR;": '\U000000AE',
799 "circledS;": '\U000024C8',
800 "circledast;": '\U0000229B',
801 "circledcirc;": '\U0000229A',
802 "circleddash;": '\U0000229D',
803 "cire;": '\U00002257',
804 "cirfnint;": '\U00002A10',
805 "cirmid;": '\U00002AEF',
806 "cirscir;": '\U000029C2',
807 "clubs;": '\U00002663',
808 "clubsuit;": '\U00002663',
809 "colon;": '\U0000003A',
810 "colone;": '\U00002254',
811 "coloneq;": '\U00002254',
812 "comma;": '\U0000002C',
813 "commat;": '\U00000040',
814 "comp;": '\U00002201',
815 "compfn;": '\U00002218',
816 "complement;": '\U00002201',
817 "complexes;": '\U00002102',
818 "cong;": '\U00002245',
819 "congdot;": '\U00002A6D',
820 "conint;": '\U0000222E',
821 "copf;": '\U0001D554',
822 "coprod;": '\U00002210',
823 "copy;": '\U000000A9',
824 "copysr;": '\U00002117',
825 "crarr;": '\U000021B5',
826 "cross;": '\U00002717',
827 "cscr;": '\U0001D4B8',
828 "csub;": '\U00002ACF',
829 "csube;": '\U00002AD1',
830 "csup;": '\U00002AD0',
831 "csupe;": '\U00002AD2',
832 "ctdot;": '\U000022EF',
833 "cudarrl;": '\U00002938',
834 "cudarrr;": '\U00002935',
835 "cuepr;": '\U000022DE',
836 "cuesc;": '\U000022DF',
837 "cularr;": '\U000021B6',
838 "cularrp;": '\U0000293D',
839 "cup;": '\U0000222A',
840 "cupbrcap;": '\U00002A48',
841 "cupcap;": '\U00002A46',
842 "cupcup;": '\U00002A4A',
843 "cupdot;": '\U0000228D',
844 "cupor;": '\U00002A45',
845 "curarr;": '\U000021B7',
846 "curarrm;": '\U0000293C',
847 "curlyeqprec;": '\U000022DE',
848 "curlyeqsucc;": '\U000022DF',
849 "curlyvee;": '\U000022CE',
850 "curlywedge;": '\U000022CF',
851 "curren;": '\U000000A4',
852 "curvearrowleft;": '\U000021B6',
853 "curvearrowright;": '\U000021B7',
854 "cuvee;": '\U000022CE',
855 "cuwed;": '\U000022CF',
856 "cwconint;": '\U00002232',
857 "cwint;": '\U00002231',
858 "cylcty;": '\U0000232D',
859 "dArr;": '\U000021D3',
860 "dHar;": '\U00002965',
861 "dagger;": '\U00002020',
862 "daleth;": '\U00002138',
863 "darr;": '\U00002193',
864 "dash;": '\U00002010',
865 "dashv;": '\U000022A3',
866 "dbkarow;": '\U0000290F',
867 "dblac;": '\U000002DD',
868 "dcaron;": '\U0000010F',
869 "dcy;": '\U00000434',
870 "dd;": '\U00002146',
871 "ddagger;": '\U00002021',
872 "ddarr;": '\U000021CA',
873 "ddotseq;": '\U00002A77',
874 "deg;": '\U000000B0',
875 "delta;": '\U000003B4',
876 "demptyv;": '\U000029B1',
877 "dfisht;": '\U0000297F',
878 "dfr;": '\U0001D521',
879 "dharl;": '\U000021C3',
880 "dharr;": '\U000021C2',
881 "diam;": '\U000022C4',
882 "diamond;": '\U000022C4',
883 "diamondsuit;": '\U00002666',
884 "diams;": '\U00002666',
885 "die;": '\U000000A8',
886 "digamma;": '\U000003DD',
887 "disin;": '\U000022F2',
888 "div;": '\U000000F7',
889 "divide;": '\U000000F7',
890 "divideontimes;": '\U000022C7',
891 "divonx;": '\U000022C7',
892 "djcy;": '\U00000452',
893 "dlcorn;": '\U0000231E',
894 "dlcrop;": '\U0000230D',
895 "dollar;": '\U00000024',
896 "dopf;": '\U0001D555',
897 "dot;": '\U000002D9',
898 "doteq;": '\U00002250',
899 "doteqdot;": '\U00002251',
900 "dotminus;": '\U00002238',
901 "dotplus;": '\U00002214',
902 "dotsquare;": '\U000022A1',
903 "doublebarwedge;": '\U00002306',
904 "downarrow;": '\U00002193',
905 "downdownarrows;": '\U000021CA',
906 "downharpoonleft;": '\U000021C3',
907 "downharpoonright;": '\U000021C2',
908 "drbkarow;": '\U00002910',
909 "drcorn;": '\U0000231F',
910 "drcrop;": '\U0000230C',
911 "dscr;": '\U0001D4B9',
912 "dscy;": '\U00000455',
913 "dsol;": '\U000029F6',
914 "dstrok;": '\U00000111',
915 "dtdot;": '\U000022F1',
916 "dtri;": '\U000025BF',
917 "dtrif;": '\U000025BE',
918 "duarr;": '\U000021F5',
919 "duhar;": '\U0000296F',
920 "dwangle;": '\U000029A6',
921 "dzcy;": '\U0000045F',
922 "dzigrarr;": '\U000027FF',
923 "eDDot;": '\U00002A77',
924 "eDot;": '\U00002251',
925 "eacute;": '\U000000E9',
926 "easter;": '\U00002A6E',
927 "ecaron;": '\U0000011B',
928 "ecir;": '\U00002256',
929 "ecirc;": '\U000000EA',
930 "ecolon;": '\U00002255',
931 "ecy;": '\U0000044D',
932 "edot;": '\U00000117',
933 "ee;": '\U00002147',
934 "efDot;": '\U00002252',
935 "efr;": '\U0001D522',
936 "eg;": '\U00002A9A',
937 "egrave;": '\U000000E8',
938 "egs;": '\U00002A96',
939 "egsdot;": '\U00002A98',
940 "el;": '\U00002A99',
941 "elinters;": '\U000023E7',
942 "ell;": '\U00002113',
943 "els;": '\U00002A95',
944 "elsdot;": '\U00002A97',
945 "emacr;": '\U00000113',
946 "empty;": '\U00002205',
947 "emptyset;": '\U00002205',
948 "emptyv;": '\U00002205',
949 "emsp;": '\U00002003',
950 "emsp13;": '\U00002004',
951 "emsp14;": '\U00002005',
952 "eng;": '\U0000014B',
953 "ensp;": '\U00002002',
954 "eogon;": '\U00000119',
955 "eopf;": '\U0001D556',
956 "epar;": '\U000022D5',
957 "eparsl;": '\U000029E3',
958 "eplus;": '\U00002A71',
959 "epsi;": '\U000003B5',
960 "epsilon;": '\U000003B5',
961 "epsiv;": '\U000003F5',
962 "eqcirc;": '\U00002256',
963 "eqcolon;": '\U00002255',
964 "eqsim;": '\U00002242',
965 "eqslantgtr;": '\U00002A96',
966 "eqslantless;": '\U00002A95',
967 "equals;": '\U0000003D',
968 "equest;": '\U0000225F',
969 "equiv;": '\U00002261',
970 "equivDD;": '\U00002A78',
971 "eqvparsl;": '\U000029E5',
972 "erDot;": '\U00002253',
973 "erarr;": '\U00002971',
974 "escr;": '\U0000212F',
975 "esdot;": '\U00002250',
976 "esim;": '\U00002242',
977 "eta;": '\U000003B7',
978 "eth;": '\U000000F0',
979 "euml;": '\U000000EB',
980 "euro;": '\U000020AC',
981 "excl;": '\U00000021',
982 "exist;": '\U00002203',
983 "expectation;": '\U00002130',
984 "exponentiale;": '\U00002147',
985 "fallingdotseq;": '\U00002252',
986 "fcy;": '\U00000444',
987 "female;": '\U00002640',
988 "ffilig;": '\U0000FB03',
989 "fflig;": '\U0000FB00',
990 "ffllig;": '\U0000FB04',
991 "ffr;": '\U0001D523',
992 "filig;": '\U0000FB01',
993 "flat;": '\U0000266D',
994 "fllig;": '\U0000FB02',
995 "fltns;": '\U000025B1',
996 "fnof;": '\U00000192',
997 "fopf;": '\U0001D557',
998 "forall;": '\U00002200',
999 "fork;": '\U000022D4',
1000 "forkv;": '\U00002AD9',
1001 "fpartint;": '\U00002A0D',
1002 "frac12;": '\U000000BD',
1003 "frac13;": '\U00002153',
1004 "frac14;": '\U000000BC',
1005 "frac15;": '\U00002155',
1006 "frac16;": '\U00002159',
1007 "frac18;": '\U0000215B',
1008 "frac23;": '\U00002154',
1009 "frac25;": '\U00002156',
1010 "frac34;": '\U000000BE',
1011 "frac35;": '\U00002157',
1012 "frac38;": '\U0000215C',
1013 "frac45;": '\U00002158',
1014 "frac56;": '\U0000215A',
1015 "frac58;": '\U0000215D',
1016 "frac78;": '\U0000215E',
1017 "frasl;": '\U00002044',
1018 "frown;": '\U00002322',
1019 "fscr;": '\U0001D4BB',
1020 "gE;": '\U00002267',
1021 "gEl;": '\U00002A8C',
1022 "gacute;": '\U000001F5',
1023 "gamma;": '\U000003B3',
1024 "gammad;": '\U000003DD',
1025 "gap;": '\U00002A86',
1026 "gbreve;": '\U0000011F',
1027 "gcirc;": '\U0000011D',
1028 "gcy;": '\U00000433',
1029 "gdot;": '\U00000121',
1030 "ge;": '\U00002265',
1031 "gel;": '\U000022DB',
1032 "geq;": '\U00002265',
1033 "geqq;": '\U00002267',
1034 "geqslant;": '\U00002A7E',
1035 "ges;": '\U00002A7E',
1036 "gescc;": '\U00002AA9',
1037 "gesdot;": '\U00002A80',
1038 "gesdoto;": '\U00002A82',
1039 "gesdotol;": '\U00002A84',
1040 "gesles;": '\U00002A94',
1041 "gfr;": '\U0001D524',
1042 "gg;": '\U0000226B',
1043 "ggg;": '\U000022D9',
1044 "gimel;": '\U00002137',
1045 "gjcy;": '\U00000453',
1046 "gl;": '\U00002277',
1047 "glE;": '\U00002A92',
1048 "gla;": '\U00002AA5',
1049 "glj;": '\U00002AA4',
1050 "gnE;": '\U00002269',
1051 "gnap;": '\U00002A8A',
1052 "gnapprox;": '\U00002A8A',
1053 "gne;": '\U00002A88',
1054 "gneq;": '\U00002A88',
1055 "gneqq;": '\U00002269',
1056 "gnsim;": '\U000022E7',
1057 "gopf;": '\U0001D558',
1058 "grave;": '\U00000060',
1059 "gscr;": '\U0000210A',
1060 "gsim;": '\U00002273',
1061 "gsime;": '\U00002A8E',
1062 "gsiml;": '\U00002A90',
1063 "gt;": '\U0000003E',
1064 "gtcc;": '\U00002AA7',
1065 "gtcir;": '\U00002A7A',
1066 "gtdot;": '\U000022D7',
1067 "gtlPar;": '\U00002995',
1068 "gtquest;": '\U00002A7C',
1069 "gtrapprox;": '\U00002A86',
1070 "gtrarr;": '\U00002978',
1071 "gtrdot;": '\U000022D7',
1072 "gtreqless;": '\U000022DB',
1073 "gtreqqless;": '\U00002A8C',
1074 "gtrless;": '\U00002277',
1075 "gtrsim;": '\U00002273',
1076 "hArr;": '\U000021D4',
1077 "hairsp;": '\U0000200A',
1078 "half;": '\U000000BD',
1079 "hamilt;": '\U0000210B',
1080 "hardcy;": '\U0000044A',
1081 "harr;": '\U00002194',
1082 "harrcir;": '\U00002948',
1083 "harrw;": '\U000021AD',
1084 "hbar;": '\U0000210F',
1085 "hcirc;": '\U00000125',
1086 "hearts;": '\U00002665',
1087 "heartsuit;": '\U00002665',
1088 "hellip;": '\U00002026',
1089 "hercon;": '\U000022B9',
1090 "hfr;": '\U0001D525',
1091 "hksearow;": '\U00002925',
1092 "hkswarow;": '\U00002926',
1093 "hoarr;": '\U000021FF',
1094 "homtht;": '\U0000223B',
1095 "hookleftarrow;": '\U000021A9',
1096 "hookrightarrow;": '\U000021AA',
1097 "hopf;": '\U0001D559',
1098 "horbar;": '\U00002015',
1099 "hscr;": '\U0001D4BD',
1100 "hslash;": '\U0000210F',
1101 "hstrok;": '\U00000127',
1102 "hybull;": '\U00002043',
1103 "hyphen;": '\U00002010',
1104 "iacute;": '\U000000ED',
1105 "ic;": '\U00002063',
1106 "icirc;": '\U000000EE',
1107 "icy;": '\U00000438',
1108 "iecy;": '\U00000435',
1109 "iexcl;": '\U000000A1',
1110 "iff;": '\U000021D4',
1111 "ifr;": '\U0001D526',
1112 "igrave;": '\U000000EC',
1113 "ii;": '\U00002148',
1114 "iiiint;": '\U00002A0C',
1115 "iiint;": '\U0000222D',
1116 "iinfin;": '\U000029DC',
1117 "iiota;": '\U00002129',
1118 "ijlig;": '\U00000133',
1119 "imacr;": '\U0000012B',
1120 "image;": '\U00002111',
1121 "imagline;": '\U00002110',
1122 "imagpart;": '\U00002111',
1123 "imath;": '\U00000131',
1124 "imof;": '\U000022B7',
1125 "imped;": '\U000001B5',
1126 "in;": '\U00002208',
1127 "incare;": '\U00002105',
1128 "infin;": '\U0000221E',
1129 "infintie;": '\U000029DD',
1130 "inodot;": '\U00000131',
1131 "int;": '\U0000222B',
1132 "intcal;": '\U000022BA',
1133 "integers;": '\U00002124',
1134 "intercal;": '\U000022BA',
1135 "intlarhk;": '\U00002A17',
1136 "intprod;": '\U00002A3C',
1137 "iocy;": '\U00000451',
1138 "iogon;": '\U0000012F',
1139 "iopf;": '\U0001D55A',
1140 "iota;": '\U000003B9',
1141 "iprod;": '\U00002A3C',
1142 "iquest;": '\U000000BF',
1143 "iscr;": '\U0001D4BE',
1144 "isin;": '\U00002208',
1145 "isinE;": '\U000022F9',
1146 "isindot;": '\U000022F5',
1147 "isins;": '\U000022F4',
1148 "isinsv;": '\U000022F3',
1149 "isinv;": '\U00002208',
1150 "it;": '\U00002062',
1151 "itilde;": '\U00000129',
1152 "iukcy;": '\U00000456',
1153 "iuml;": '\U000000EF',
1154 "jcirc;": '\U00000135',
1155 "jcy;": '\U00000439',
1156 "jfr;": '\U0001D527',
1157 "jmath;": '\U00000237',
1158 "jopf;": '\U0001D55B',
1159 "jscr;": '\U0001D4BF',
1160 "jsercy;": '\U00000458',
1161 "jukcy;": '\U00000454',
1162 "kappa;": '\U000003BA',
1163 "kappav;": '\U000003F0',
1164 "kcedil;": '\U00000137',
1165 "kcy;": '\U0000043A',
1166 "kfr;": '\U0001D528',
1167 "kgreen;": '\U00000138',
1168 "khcy;": '\U00000445',
1169 "kjcy;": '\U0000045C',
1170 "kopf;": '\U0001D55C',
1171 "kscr;": '\U0001D4C0',
1172 "lAarr;": '\U000021DA',
1173 "lArr;": '\U000021D0',
1174 "lAtail;": '\U0000291B',
1175 "lBarr;": '\U0000290E',
1176 "lE;": '\U00002266',
1177 "lEg;": '\U00002A8B',
1178 "lHar;": '\U00002962',
1179 "lacute;": '\U0000013A',
1180 "laemptyv;": '\U000029B4',
1181 "lagran;": '\U00002112',
1182 "lambda;": '\U000003BB',
1183 "lang;": '\U000027E8',
1184 "langd;": '\U00002991',
1185 "langle;": '\U000027E8',
1186 "lap;": '\U00002A85',
1187 "laquo;": '\U000000AB',
1188 "larr;": '\U00002190',
1189 "larrb;": '\U000021E4',
1190 "larrbfs;": '\U0000291F',
1191 "larrfs;": '\U0000291D',
1192 "larrhk;": '\U000021A9',
1193 "larrlp;": '\U000021AB',
1194 "larrpl;": '\U00002939',
1195 "larrsim;": '\U00002973',
1196 "larrtl;": '\U000021A2',
1197 "lat;": '\U00002AAB',
1198 "latail;": '\U00002919',
1199 "late;": '\U00002AAD',
1200 "lbarr;": '\U0000290C',
1201 "lbbrk;": '\U00002772',
1202 "lbrace;": '\U0000007B',
1203 "lbrack;": '\U0000005B',
1204 "lbrke;": '\U0000298B',
1205 "lbrksld;": '\U0000298F',
1206 "lbrkslu;": '\U0000298D',
1207 "lcaron;": '\U0000013E',
1208 "lcedil;": '\U0000013C',
1209 "lceil;": '\U00002308',
1210 "lcub;": '\U0000007B',
1211 "lcy;": '\U0000043B',
1212 "ldca;": '\U00002936',
1213 "ldquo;": '\U0000201C',
1214 "ldquor;": '\U0000201E',
1215 "ldrdhar;": '\U00002967',
1216 "ldrushar;": '\U0000294B',
1217 "ldsh;": '\U000021B2',
1218 "le;": '\U00002264',
1219 "leftarrow;": '\U00002190',
1220 "leftarrowtail;": '\U000021A2',
1221 "leftharpoondown;": '\U000021BD',
1222 "leftharpoonup;": '\U000021BC',
1223 "leftleftarrows;": '\U000021C7',
1224 "leftrightarrow;": '\U00002194',
1225 "leftrightarrows;": '\U000021C6',
1226 "leftrightharpoons;": '\U000021CB',
1227 "leftrightsquigarrow;": '\U000021AD',
1228 "leftthreetimes;": '\U000022CB',
1229 "leg;": '\U000022DA',
1230 "leq;": '\U00002264',
1231 "leqq;": '\U00002266',
1232 "leqslant;": '\U00002A7D',
1233 "les;": '\U00002A7D',
1234 "lescc;": '\U00002AA8',
1235 "lesdot;": '\U00002A7F',
1236 "lesdoto;": '\U00002A81',
1237 "lesdotor;": '\U00002A83',
1238 "lesges;": '\U00002A93',
1239 "lessapprox;": '\U00002A85',
1240 "lessdot;": '\U000022D6',
1241 "lesseqgtr;": '\U000022DA',
1242 "lesseqqgtr;": '\U00002A8B',
1243 "lessgtr;": '\U00002276',
1244 "lesssim;": '\U00002272',
1245 "lfisht;": '\U0000297C',
1246 "lfloor;": '\U0000230A',
1247 "lfr;": '\U0001D529',
1248 "lg;": '\U00002276',
1249 "lgE;": '\U00002A91',
1250 "lhard;": '\U000021BD',
1251 "lharu;": '\U000021BC',
1252 "lharul;": '\U0000296A',
1253 "lhblk;": '\U00002584',
1254 "ljcy;": '\U00000459',
1255 "ll;": '\U0000226A',
1256 "llarr;": '\U000021C7',
1257 "llcorner;": '\U0000231E',
1258 "llhard;": '\U0000296B',
1259 "lltri;": '\U000025FA',
1260 "lmidot;": '\U00000140',
1261 "lmoust;": '\U000023B0',
1262 "lmoustache;": '\U000023B0',
1263 "lnE;": '\U00002268',
1264 "lnap;": '\U00002A89',
1265 "lnapprox;": '\U00002A89',
1266 "lne;": '\U00002A87',
1267 "lneq;": '\U00002A87',
1268 "lneqq;": '\U00002268',
1269 "lnsim;": '\U000022E6',
1270 "loang;": '\U000027EC',
1271 "loarr;": '\U000021FD',
1272 "lobrk;": '\U000027E6',
1273 "longleftarrow;": '\U000027F5',
1274 "longleftrightarrow;": '\U000027F7',
1275 "longmapsto;": '\U000027FC',
1276 "longrightarrow;": '\U000027F6',
1277 "looparrowleft;": '\U000021AB',
1278 "looparrowright;": '\U000021AC',
1279 "lopar;": '\U00002985',
1280 "lopf;": '\U0001D55D',
1281 "loplus;": '\U00002A2D',
1282 "lotimes;": '\U00002A34',
1283 "lowast;": '\U00002217',
1284 "lowbar;": '\U0000005F',
1285 "loz;": '\U000025CA',
1286 "lozenge;": '\U000025CA',
1287 "lozf;": '\U000029EB',
1288 "lpar;": '\U00000028',
1289 "lparlt;": '\U00002993',
1290 "lrarr;": '\U000021C6',
1291 "lrcorner;": '\U0000231F',
1292 "lrhar;": '\U000021CB',
1293 "lrhard;": '\U0000296D',
1294 "lrm;": '\U0000200E',
1295 "lrtri;": '\U000022BF',
1296 "lsaquo;": '\U00002039',
1297 "lscr;": '\U0001D4C1',
1298 "lsh;": '\U000021B0',
1299 "lsim;": '\U00002272',
1300 "lsime;": '\U00002A8D',
1301 "lsimg;": '\U00002A8F',
1302 "lsqb;": '\U0000005B',
1303 "lsquo;": '\U00002018',
1304 "lsquor;": '\U0000201A',
1305 "lstrok;": '\U00000142',
1306 "lt;": '\U0000003C',
1307 "ltcc;": '\U00002AA6',
1308 "ltcir;": '\U00002A79',
1309 "ltdot;": '\U000022D6',
1310 "lthree;": '\U000022CB',
1311 "ltimes;": '\U000022C9',
1312 "ltlarr;": '\U00002976',
1313 "ltquest;": '\U00002A7B',
1314 "ltrPar;": '\U00002996',
1315 "ltri;": '\U000025C3',
1316 "ltrie;": '\U000022B4',
1317 "ltrif;": '\U000025C2',
1318 "lurdshar;": '\U0000294A',
1319 "luruhar;": '\U00002966',
1320 "mDDot;": '\U0000223A',
1321 "macr;": '\U000000AF',
1322 "male;": '\U00002642',
1323 "malt;": '\U00002720',
1324 "maltese;": '\U00002720',
1325 "map;": '\U000021A6',
1326 "mapsto;": '\U000021A6',
1327 "mapstodown;": '\U000021A7',
1328 "mapstoleft;": '\U000021A4',
1329 "mapstoup;": '\U000021A5',
1330 "marker;": '\U000025AE',
1331 "mcomma;": '\U00002A29',
1332 "mcy;": '\U0000043C',
1333 "mdash;": '\U00002014',
1334 "measuredangle;": '\U00002221',
1335 "mfr;": '\U0001D52A',
1336 "mho;": '\U00002127',
1337 "micro;": '\U000000B5',
1338 "mid;": '\U00002223',
1339 "midast;": '\U0000002A',
1340 "midcir;": '\U00002AF0',
1341 "middot;": '\U000000B7',
1342 "minus;": '\U00002212',
1343 "minusb;": '\U0000229F',
1344 "minusd;": '\U00002238',
1345 "minusdu;": '\U00002A2A',
1346 "mlcp;": '\U00002ADB',
1347 "mldr;": '\U00002026',
1348 "mnplus;": '\U00002213',
1349 "models;": '\U000022A7',
1350 "mopf;": '\U0001D55E',
1351 "mp;": '\U00002213',
1352 "mscr;": '\U0001D4C2',
1353 "mstpos;": '\U0000223E',
1354 "mu;": '\U000003BC',
1355 "multimap;": '\U000022B8',
1356 "mumap;": '\U000022B8',
1357 "nLeftarrow;": '\U000021CD',
1358 "nLeftrightarrow;": '\U000021CE',
1359 "nRightarrow;": '\U000021CF',
1360 "nVDash;": '\U000022AF',
1361 "nVdash;": '\U000022AE',
1362 "nabla;": '\U00002207',
1363 "nacute;": '\U00000144',
1364 "nap;": '\U00002249',
1365 "napos;": '\U00000149',
1366 "napprox;": '\U00002249',
1367 "natur;": '\U0000266E',
1368 "natural;": '\U0000266E',
1369 "naturals;": '\U00002115',
1370 "nbsp;": '\U000000A0',
1371 "ncap;": '\U00002A43',
1372 "ncaron;": '\U00000148',
1373 "ncedil;": '\U00000146',
1374 "ncong;": '\U00002247',
1375 "ncup;": '\U00002A42',
1376 "ncy;": '\U0000043D',
1377 "ndash;": '\U00002013',
1378 "ne;": '\U00002260',
1379 "neArr;": '\U000021D7',
1380 "nearhk;": '\U00002924',
1381 "nearr;": '\U00002197',
1382 "nearrow;": '\U00002197',
1383 "nequiv;": '\U00002262',
1384 "nesear;": '\U00002928',
1385 "nexist;": '\U00002204',
1386 "nexists;": '\U00002204',
1387 "nfr;": '\U0001D52B',
1388 "nge;": '\U00002271',
1389 "ngeq;": '\U00002271',
1390 "ngsim;": '\U00002275',
1391 "ngt;": '\U0000226F',
1392 "ngtr;": '\U0000226F',
1393 "nhArr;": '\U000021CE',
1394 "nharr;": '\U000021AE',
1395 "nhpar;": '\U00002AF2',
1396 "ni;": '\U0000220B',
1397 "nis;": '\U000022FC',
1398 "nisd;": '\U000022FA',
1399 "niv;": '\U0000220B',
1400 "njcy;": '\U0000045A',
1401 "nlArr;": '\U000021CD',
1402 "nlarr;": '\U0000219A',
1403 "nldr;": '\U00002025',
1404 "nle;": '\U00002270',
1405 "nleftarrow;": '\U0000219A',
1406 "nleftrightarrow;": '\U000021AE',
1407 "nleq;": '\U00002270',
1408 "nless;": '\U0000226E',
1409 "nlsim;": '\U00002274',
1410 "nlt;": '\U0000226E',
1411 "nltri;": '\U000022EA',
1412 "nltrie;": '\U000022EC',
1413 "nmid;": '\U00002224',
1414 "nopf;": '\U0001D55F',
1415 "not;": '\U000000AC',
1416 "notin;": '\U00002209',
1417 "notinva;": '\U00002209',
1418 "notinvb;": '\U000022F7',
1419 "notinvc;": '\U000022F6',
1420 "notni;": '\U0000220C',
1421 "notniva;": '\U0000220C',
1422 "notnivb;": '\U000022FE',
1423 "notnivc;": '\U000022FD',
1424 "npar;": '\U00002226',
1425 "nparallel;": '\U00002226',
1426 "npolint;": '\U00002A14',
1427 "npr;": '\U00002280',
1428 "nprcue;": '\U000022E0',
1429 "nprec;": '\U00002280',
1430 "nrArr;": '\U000021CF',
1431 "nrarr;": '\U0000219B',
1432 "nrightarrow;": '\U0000219B',
1433 "nrtri;": '\U000022EB',
1434 "nrtrie;": '\U000022ED',
1435 "nsc;": '\U00002281',
1436 "nsccue;": '\U000022E1',
1437 "nscr;": '\U0001D4C3',
1438 "nshortmid;": '\U00002224',
1439 "nshortparallel;": '\U00002226',
1440 "nsim;": '\U00002241',
1441 "nsime;": '\U00002244',
1442 "nsimeq;": '\U00002244',
1443 "nsmid;": '\U00002224',
1444 "nspar;": '\U00002226',
1445 "nsqsube;": '\U000022E2',
1446 "nsqsupe;": '\U000022E3',
1447 "nsub;": '\U00002284',
1448 "nsube;": '\U00002288',
1449 "nsubseteq;": '\U00002288',
1450 "nsucc;": '\U00002281',
1451 "nsup;": '\U00002285',
1452 "nsupe;": '\U00002289',
1453 "nsupseteq;": '\U00002289',
1454 "ntgl;": '\U00002279',
1455 "ntilde;": '\U000000F1',
1456 "ntlg;": '\U00002278',
1457 "ntriangleleft;": '\U000022EA',
1458 "ntrianglelefteq;": '\U000022EC',
1459 "ntriangleright;": '\U000022EB',
1460 "ntrianglerighteq;": '\U000022ED',
1461 "nu;": '\U000003BD',
1462 "num;": '\U00000023',
1463 "numero;": '\U00002116',
1464 "numsp;": '\U00002007',
1465 "nvDash;": '\U000022AD',
1466 "nvHarr;": '\U00002904',
1467 "nvdash;": '\U000022AC',
1468 "nvinfin;": '\U000029DE',
1469 "nvlArr;": '\U00002902',
1470 "nvrArr;": '\U00002903',
1471 "nwArr;": '\U000021D6',
1472 "nwarhk;": '\U00002923',
1473 "nwarr;": '\U00002196',
1474 "nwarrow;": '\U00002196',
1475 "nwnear;": '\U00002927',
1476 "oS;": '\U000024C8',
1477 "oacute;": '\U000000F3',
1478 "oast;": '\U0000229B',
1479 "ocir;": '\U0000229A',
1480 "ocirc;": '\U000000F4',
1481 "ocy;": '\U0000043E',
1482 "odash;": '\U0000229D',
1483 "odblac;": '\U00000151',
1484 "odiv;": '\U00002A38',
1485 "odot;": '\U00002299',
1486 "odsold;": '\U000029BC',
1487 "oelig;": '\U00000153',
1488 "ofcir;": '\U000029BF',
1489 "ofr;": '\U0001D52C',
1490 "ogon;": '\U000002DB',
1491 "ograve;": '\U000000F2',
1492 "ogt;": '\U000029C1',
1493 "ohbar;": '\U000029B5',
1494 "ohm;": '\U000003A9',
1495 "oint;": '\U0000222E',
1496 "olarr;": '\U000021BA',
1497 "olcir;": '\U000029BE',
1498 "olcross;": '\U000029BB',
1499 "oline;": '\U0000203E',
1500 "olt;": '\U000029C0',
1501 "omacr;": '\U0000014D',
1502 "omega;": '\U000003C9',
1503 "omicron;": '\U000003BF',
1504 "omid;": '\U000029B6',
1505 "ominus;": '\U00002296',
1506 "oopf;": '\U0001D560',
1507 "opar;": '\U000029B7',
1508 "operp;": '\U000029B9',
1509 "oplus;": '\U00002295',
1510 "or;": '\U00002228',
1511 "orarr;": '\U000021BB',
1512 "ord;": '\U00002A5D',
1513 "order;": '\U00002134',
1514 "orderof;": '\U00002134',
1515 "ordf;": '\U000000AA',
1516 "ordm;": '\U000000BA',
1517 "origof;": '\U000022B6',
1518 "oror;": '\U00002A56',
1519 "orslope;": '\U00002A57',
1520 "orv;": '\U00002A5B',
1521 "oscr;": '\U00002134',
1522 "oslash;": '\U000000F8',
1523 "osol;": '\U00002298',
1524 "otilde;": '\U000000F5',
1525 "otimes;": '\U00002297',
1526 "otimesas;": '\U00002A36',
1527 "ouml;": '\U000000F6',
1528 "ovbar;": '\U0000233D',
1529 "par;": '\U00002225',
1530 "para;": '\U000000B6',
1531 "parallel;": '\U00002225',
1532 "parsim;": '\U00002AF3',
1533 "parsl;": '\U00002AFD',
1534 "part;": '\U00002202',
1535 "pcy;": '\U0000043F',
1536 "percnt;": '\U00000025',
1537 "period;": '\U0000002E',
1538 "permil;": '\U00002030',
1539 "perp;": '\U000022A5',
1540 "pertenk;": '\U00002031',
1541 "pfr;": '\U0001D52D',
1542 "phi;": '\U000003C6',
1543 "phiv;": '\U000003D5',
1544 "phmmat;": '\U00002133',
1545 "phone;": '\U0000260E',
1546 "pi;": '\U000003C0',
1547 "pitchfork;": '\U000022D4',
1548 "piv;": '\U000003D6',
1549 "planck;": '\U0000210F',
1550 "planckh;": '\U0000210E',
1551 "plankv;": '\U0000210F',
1552 "plus;": '\U0000002B',
1553 "plusacir;": '\U00002A23',
1554 "plusb;": '\U0000229E',
1555 "pluscir;": '\U00002A22',
1556 "plusdo;": '\U00002214',
1557 "plusdu;": '\U00002A25',
1558 "pluse;": '\U00002A72',
1559 "plusmn;": '\U000000B1',
1560 "plussim;": '\U00002A26',
1561 "plustwo;": '\U00002A27',
1562 "pm;": '\U000000B1',
1563 "pointint;": '\U00002A15',
1564 "popf;": '\U0001D561',
1565 "pound;": '\U000000A3',
1566 "pr;": '\U0000227A',
1567 "prE;": '\U00002AB3',
1568 "prap;": '\U00002AB7',
1569 "prcue;": '\U0000227C',
1570 "pre;": '\U00002AAF',
1571 "prec;": '\U0000227A',
1572 "precapprox;": '\U00002AB7',
1573 "preccurlyeq;": '\U0000227C',
1574 "preceq;": '\U00002AAF',
1575 "precnapprox;": '\U00002AB9',
1576 "precneqq;": '\U00002AB5',
1577 "precnsim;": '\U000022E8',
1578 "precsim;": '\U0000227E',
1579 "prime;": '\U00002032',
1580 "primes;": '\U00002119',
1581 "prnE;": '\U00002AB5',
1582 "prnap;": '\U00002AB9',
1583 "prnsim;": '\U000022E8',
1584 "prod;": '\U0000220F',
1585 "profalar;": '\U0000232E',
1586 "profline;": '\U00002312',
1587 "profsurf;": '\U00002313',
1588 "prop;": '\U0000221D',
1589 "propto;": '\U0000221D',
1590 "prsim;": '\U0000227E',
1591 "prurel;": '\U000022B0',
1592 "pscr;": '\U0001D4C5',
1593 "psi;": '\U000003C8',
1594 "puncsp;": '\U00002008',
1595 "qfr;": '\U0001D52E',
1596 "qint;": '\U00002A0C',
1597 "qopf;": '\U0001D562',
1598 "qprime;": '\U00002057',
1599 "qscr;": '\U0001D4C6',
1600 "quaternions;": '\U0000210D',
1601 "quatint;": '\U00002A16',
1602 "quest;": '\U0000003F',
1603 "questeq;": '\U0000225F',
1604 "quot;": '\U00000022',
1605 "rAarr;": '\U000021DB',
1606 "rArr;": '\U000021D2',
1607 "rAtail;": '\U0000291C',
1608 "rBarr;": '\U0000290F',
1609 "rHar;": '\U00002964',
1610 "racute;": '\U00000155',
1611 "radic;": '\U0000221A',
1612 "raemptyv;": '\U000029B3',
1613 "rang;": '\U000027E9',
1614 "rangd;": '\U00002992',
1615 "range;": '\U000029A5',
1616 "rangle;": '\U000027E9',
1617 "raquo;": '\U000000BB',
1618 "rarr;": '\U00002192',
1619 "rarrap;": '\U00002975',
1620 "rarrb;": '\U000021E5',
1621 "rarrbfs;": '\U00002920',
1622 "rarrc;": '\U00002933',
1623 "rarrfs;": '\U0000291E',
1624 "rarrhk;": '\U000021AA',
1625 "rarrlp;": '\U000021AC',
1626 "rarrpl;": '\U00002945',
1627 "rarrsim;": '\U00002974',
1628 "rarrtl;": '\U000021A3',
1629 "rarrw;": '\U0000219D',
1630 "ratail;": '\U0000291A',
1631 "ratio;": '\U00002236',
1632 "rationals;": '\U0000211A',
1633 "rbarr;": '\U0000290D',
1634 "rbbrk;": '\U00002773',
1635 "rbrace;": '\U0000007D',
1636 "rbrack;": '\U0000005D',
1637 "rbrke;": '\U0000298C',
1638 "rbrksld;": '\U0000298E',
1639 "rbrkslu;": '\U00002990',
1640 "rcaron;": '\U00000159',
1641 "rcedil;": '\U00000157',
1642 "rceil;": '\U00002309',
1643 "rcub;": '\U0000007D',
1644 "rcy;": '\U00000440',
1645 "rdca;": '\U00002937',
1646 "rdldhar;": '\U00002969',
1647 "rdquo;": '\U0000201D',
1648 "rdquor;": '\U0000201D',
1649 "rdsh;": '\U000021B3',
1650 "real;": '\U0000211C',
1651 "realine;": '\U0000211B',
1652 "realpart;": '\U0000211C',
1653 "reals;": '\U0000211D',
1654 "rect;": '\U000025AD',
1655 "reg;": '\U000000AE',
1656 "rfisht;": '\U0000297D',
1657 "rfloor;": '\U0000230B',
1658 "rfr;": '\U0001D52F',
1659 "rhard;": '\U000021C1',
1660 "rharu;": '\U000021C0',
1661 "rharul;": '\U0000296C',
1662 "rho;": '\U000003C1',
1663 "rhov;": '\U000003F1',
1664 "rightarrow;": '\U00002192',
1665 "rightarrowtail;": '\U000021A3',
1666 "rightharpoondown;": '\U000021C1',
1667 "rightharpoonup;": '\U000021C0',
1668 "rightleftarrows;": '\U000021C4',
1669 "rightleftharpoons;": '\U000021CC',
1670 "rightrightarrows;": '\U000021C9',
1671 "rightsquigarrow;": '\U0000219D',
1672 "rightthreetimes;": '\U000022CC',
1673 "ring;": '\U000002DA',
1674 "risingdotseq;": '\U00002253',
1675 "rlarr;": '\U000021C4',
1676 "rlhar;": '\U000021CC',
1677 "rlm;": '\U0000200F',
1678 "rmoust;": '\U000023B1',
1679 "rmoustache;": '\U000023B1',
1680 "rnmid;": '\U00002AEE',
1681 "roang;": '\U000027ED',
1682 "roarr;": '\U000021FE',
1683 "robrk;": '\U000027E7',
1684 "ropar;": '\U00002986',
1685 "ropf;": '\U0001D563',
1686 "roplus;": '\U00002A2E',
1687 "rotimes;": '\U00002A35',
1688 "rpar;": '\U00000029',
1689 "rpargt;": '\U00002994',
1690 "rppolint;": '\U00002A12',
1691 "rrarr;": '\U000021C9',
1692 "rsaquo;": '\U0000203A',
1693 "rscr;": '\U0001D4C7',
1694 "rsh;": '\U000021B1',
1695 "rsqb;": '\U0000005D',
1696 "rsquo;": '\U00002019',
1697 "rsquor;": '\U00002019',
1698 "rthree;": '\U000022CC',
1699 "rtimes;": '\U000022CA',
1700 "rtri;": '\U000025B9',
1701 "rtrie;": '\U000022B5',
1702 "rtrif;": '\U000025B8',
1703 "rtriltri;": '\U000029CE',
1704 "ruluhar;": '\U00002968',
1705 "rx;": '\U0000211E',
1706 "sacute;": '\U0000015B',
1707 "sbquo;": '\U0000201A',
1708 "sc;": '\U0000227B',
1709 "scE;": '\U00002AB4',
1710 "scap;": '\U00002AB8',
1711 "scaron;": '\U00000161',
1712 "sccue;": '\U0000227D',
1713 "sce;": '\U00002AB0',
1714 "scedil;": '\U0000015F',
1715 "scirc;": '\U0000015D',
1716 "scnE;": '\U00002AB6',
1717 "scnap;": '\U00002ABA',
1718 "scnsim;": '\U000022E9',
1719 "scpolint;": '\U00002A13',
1720 "scsim;": '\U0000227F',
1721 "scy;": '\U00000441',
1722 "sdot;": '\U000022C5',
1723 "sdotb;": '\U000022A1',
1724 "sdote;": '\U00002A66',
1725 "seArr;": '\U000021D8',
1726 "searhk;": '\U00002925',
1727 "searr;": '\U00002198',
1728 "searrow;": '\U00002198',
1729 "sect;": '\U000000A7',
1730 "semi;": '\U0000003B',
1731 "seswar;": '\U00002929',
1732 "setminus;": '\U00002216',
1733 "setmn;": '\U00002216',
1734 "sext;": '\U00002736',
1735 "sfr;": '\U0001D530',
1736 "sfrown;": '\U00002322',
1737 "sharp;": '\U0000266F',
1738 "shchcy;": '\U00000449',
1739 "shcy;": '\U00000448',
1740 "shortmid;": '\U00002223',
1741 "shortparallel;": '\U00002225',
1742 "shy;": '\U000000AD',
1743 "sigma;": '\U000003C3',
1744 "sigmaf;": '\U000003C2',
1745 "sigmav;": '\U000003C2',
1746 "sim;": '\U0000223C',
1747 "simdot;": '\U00002A6A',
1748 "sime;": '\U00002243',
1749 "simeq;": '\U00002243',
1750 "simg;": '\U00002A9E',
1751 "simgE;": '\U00002AA0',
1752 "siml;": '\U00002A9D',
1753 "simlE;": '\U00002A9F',
1754 "simne;": '\U00002246',
1755 "simplus;": '\U00002A24',
1756 "simrarr;": '\U00002972',
1757 "slarr;": '\U00002190',
1758 "smallsetminus;": '\U00002216',
1759 "smashp;": '\U00002A33',
1760 "smeparsl;": '\U000029E4',
1761 "smid;": '\U00002223',
1762 "smile;": '\U00002323',
1763 "smt;": '\U00002AAA',
1764 "smte;": '\U00002AAC',
1765 "softcy;": '\U0000044C',
1766 "sol;": '\U0000002F',
1767 "solb;": '\U000029C4',
1768 "solbar;": '\U0000233F',
1769 "sopf;": '\U0001D564',
1770 "spades;": '\U00002660',
1771 "spadesuit;": '\U00002660',
1772 "spar;": '\U00002225',
1773 "sqcap;": '\U00002293',
1774 "sqcup;": '\U00002294',
1775 "sqsub;": '\U0000228F',
1776 "sqsube;": '\U00002291',
1777 "sqsubset;": '\U0000228F',
1778 "sqsubseteq;": '\U00002291',
1779 "sqsup;": '\U00002290',
1780 "sqsupe;": '\U00002292',
1781 "sqsupset;": '\U00002290',
1782 "sqsupseteq;": '\U00002292',
1783 "squ;": '\U000025A1',
1784 "square;": '\U000025A1',
1785 "squarf;": '\U000025AA',
1786 "squf;": '\U000025AA',
1787 "srarr;": '\U00002192',
1788 "sscr;": '\U0001D4C8',
1789 "ssetmn;": '\U00002216',
1790 "ssmile;": '\U00002323',
1791 "sstarf;": '\U000022C6',
1792 "star;": '\U00002606',
1793 "starf;": '\U00002605',
1794 "straightepsilon;": '\U000003F5',
1795 "straightphi;": '\U000003D5',
1796 "strns;": '\U000000AF',
1797 "sub;": '\U00002282',
1798 "subE;": '\U00002AC5',
1799 "subdot;": '\U00002ABD',
1800 "sube;": '\U00002286',
1801 "subedot;": '\U00002AC3',
1802 "submult;": '\U00002AC1',
1803 "subnE;": '\U00002ACB',
1804 "subne;": '\U0000228A',
1805 "subplus;": '\U00002ABF',
1806 "subrarr;": '\U00002979',
1807 "subset;": '\U00002282',
1808 "subseteq;": '\U00002286',
1809 "subseteqq;": '\U00002AC5',
1810 "subsetneq;": '\U0000228A',
1811 "subsetneqq;": '\U00002ACB',
1812 "subsim;": '\U00002AC7',
1813 "subsub;": '\U00002AD5',
1814 "subsup;": '\U00002AD3',
1815 "succ;": '\U0000227B',
1816 "succapprox;": '\U00002AB8',
1817 "succcurlyeq;": '\U0000227D',
1818 "succeq;": '\U00002AB0',
1819 "succnapprox;": '\U00002ABA',
1820 "succneqq;": '\U00002AB6',
1821 "succnsim;": '\U000022E9',
1822 "succsim;": '\U0000227F',
1823 "sum;": '\U00002211',
1824 "sung;": '\U0000266A',
1825 "sup;": '\U00002283',
1826 "sup1;": '\U000000B9',
1827 "sup2;": '\U000000B2',
1828 "sup3;": '\U000000B3',
1829 "supE;": '\U00002AC6',
1830 "supdot;": '\U00002ABE',
1831 "supdsub;": '\U00002AD8',
1832 "supe;": '\U00002287',
1833 "supedot;": '\U00002AC4',
1834 "suphsol;": '\U000027C9',
1835 "suphsub;": '\U00002AD7',
1836 "suplarr;": '\U0000297B',
1837 "supmult;": '\U00002AC2',
1838 "supnE;": '\U00002ACC',
1839 "supne;": '\U0000228B',
1840 "supplus;": '\U00002AC0',
1841 "supset;": '\U00002283',
1842 "supseteq;": '\U00002287',
1843 "supseteqq;": '\U00002AC6',
1844 "supsetneq;": '\U0000228B',
1845 "supsetneqq;": '\U00002ACC',
1846 "supsim;": '\U00002AC8',
1847 "supsub;": '\U00002AD4',
1848 "supsup;": '\U00002AD6',
1849 "swArr;": '\U000021D9',
1850 "swarhk;": '\U00002926',
1851 "swarr;": '\U00002199',
1852 "swarrow;": '\U00002199',
1853 "swnwar;": '\U0000292A',
1854 "szlig;": '\U000000DF',
1855 "target;": '\U00002316',
1856 "tau;": '\U000003C4',
1857 "tbrk;": '\U000023B4',
1858 "tcaron;": '\U00000165',
1859 "tcedil;": '\U00000163',
1860 "tcy;": '\U00000442',
1861 "tdot;": '\U000020DB',
1862 "telrec;": '\U00002315',
1863 "tfr;": '\U0001D531',
1864 "there4;": '\U00002234',
1865 "therefore;": '\U00002234',
1866 "theta;": '\U000003B8',
1867 "thetasym;": '\U000003D1',
1868 "thetav;": '\U000003D1',
1869 "thickapprox;": '\U00002248',
1870 "thicksim;": '\U0000223C',
1871 "thinsp;": '\U00002009',
1872 "thkap;": '\U00002248',
1873 "thksim;": '\U0000223C',
1874 "thorn;": '\U000000FE',
1875 "tilde;": '\U000002DC',
1876 "times;": '\U000000D7',
1877 "timesb;": '\U000022A0',
1878 "timesbar;": '\U00002A31',
1879 "timesd;": '\U00002A30',
1880 "tint;": '\U0000222D',
1881 "toea;": '\U00002928',
1882 "top;": '\U000022A4',
1883 "topbot;": '\U00002336',
1884 "topcir;": '\U00002AF1',
1885 "topf;": '\U0001D565',
1886 "topfork;": '\U00002ADA',
1887 "tosa;": '\U00002929',
1888 "tprime;": '\U00002034',
1889 "trade;": '\U00002122',
1890 "triangle;": '\U000025B5',
1891 "triangledown;": '\U000025BF',
1892 "triangleleft;": '\U000025C3',
1893 "trianglelefteq;": '\U000022B4',
1894 "triangleq;": '\U0000225C',
1895 "triangleright;": '\U000025B9',
1896 "trianglerighteq;": '\U000022B5',
1897 "tridot;": '\U000025EC',
1898 "trie;": '\U0000225C',
1899 "triminus;": '\U00002A3A',
1900 "triplus;": '\U00002A39',
1901 "trisb;": '\U000029CD',
1902 "tritime;": '\U00002A3B',
1903 "trpezium;": '\U000023E2',
1904 "tscr;": '\U0001D4C9',
1905 "tscy;": '\U00000446',
1906 "tshcy;": '\U0000045B',
1907 "tstrok;": '\U00000167',
1908 "twixt;": '\U0000226C',
1909 "twoheadleftarrow;": '\U0000219E',
1910 "twoheadrightarrow;": '\U000021A0',
1911 "uArr;": '\U000021D1',
1912 "uHar;": '\U00002963',
1913 "uacute;": '\U000000FA',
1914 "uarr;": '\U00002191',
1915 "ubrcy;": '\U0000045E',
1916 "ubreve;": '\U0000016D',
1917 "ucirc;": '\U000000FB',
1918 "ucy;": '\U00000443',
1919 "udarr;": '\U000021C5',
1920 "udblac;": '\U00000171',
1921 "udhar;": '\U0000296E',
1922 "ufisht;": '\U0000297E',
1923 "ufr;": '\U0001D532',
1924 "ugrave;": '\U000000F9',
1925 "uharl;": '\U000021BF',
1926 "uharr;": '\U000021BE',
1927 "uhblk;": '\U00002580',
1928 "ulcorn;": '\U0000231C',
1929 "ulcorner;": '\U0000231C',
1930 "ulcrop;": '\U0000230F',
1931 "ultri;": '\U000025F8',
1932 "umacr;": '\U0000016B',
1933 "uml;": '\U000000A8',
1934 "uogon;": '\U00000173',
1935 "uopf;": '\U0001D566',
1936 "uparrow;": '\U00002191',
1937 "updownarrow;": '\U00002195',
1938 "upharpoonleft;": '\U000021BF',
1939 "upharpoonright;": '\U000021BE',
1940 "uplus;": '\U0000228E',
1941 "upsi;": '\U000003C5',
1942 "upsih;": '\U000003D2',
1943 "upsilon;": '\U000003C5',
1944 "upuparrows;": '\U000021C8',
1945 "urcorn;": '\U0000231D',
1946 "urcorner;": '\U0000231D',
1947 "urcrop;": '\U0000230E',
1948 "uring;": '\U0000016F',
1949 "urtri;": '\U000025F9',
1950 "uscr;": '\U0001D4CA',
1951 "utdot;": '\U000022F0',
1952 "utilde;": '\U00000169',
1953 "utri;": '\U000025B5',
1954 "utrif;": '\U000025B4',
1955 "uuarr;": '\U000021C8',
1956 "uuml;": '\U000000FC',
1957 "uwangle;": '\U000029A7',
1958 "vArr;": '\U000021D5',
1959 "vBar;": '\U00002AE8',
1960 "vBarv;": '\U00002AE9',
1961 "vDash;": '\U000022A8',
1962 "vangrt;": '\U0000299C',
1963 "varepsilon;": '\U000003F5',
1964 "varkappa;": '\U000003F0',
1965 "varnothing;": '\U00002205',
1966 "varphi;": '\U000003D5',
1967 "varpi;": '\U000003D6',
1968 "varpropto;": '\U0000221D',
1969 "varr;": '\U00002195',
1970 "varrho;": '\U000003F1',
1971 "varsigma;": '\U000003C2',
1972 "vartheta;": '\U000003D1',
1973 "vartriangleleft;": '\U000022B2',
1974 "vartriangleright;": '\U000022B3',
1975 "vcy;": '\U00000432',
1976 "vdash;": '\U000022A2',
1977 "vee;": '\U00002228',
1978 "veebar;": '\U000022BB',
1979 "veeeq;": '\U0000225A',
1980 "vellip;": '\U000022EE',
1981 "verbar;": '\U0000007C',
1982 "vert;": '\U0000007C',
1983 "vfr;": '\U0001D533',
1984 "vltri;": '\U000022B2',
1985 "vopf;": '\U0001D567',
1986 "vprop;": '\U0000221D',
1987 "vrtri;": '\U000022B3',
1988 "vscr;": '\U0001D4CB',
1989 "vzigzag;": '\U0000299A',
1990 "wcirc;": '\U00000175',
1991 "wedbar;": '\U00002A5F',
1992 "wedge;": '\U00002227',
1993 "wedgeq;": '\U00002259',
1994 "weierp;": '\U00002118',
1995 "wfr;": '\U0001D534',
1996 "wopf;": '\U0001D568',
1997 "wp;": '\U00002118',
1998 "wr;": '\U00002240',
1999 "wreath;": '\U00002240',
2000 "wscr;": '\U0001D4CC',
2001 "xcap;": '\U000022C2',
2002 "xcirc;": '\U000025EF',
2003 "xcup;": '\U000022C3',
2004 "xdtri;": '\U000025BD',
2005 "xfr;": '\U0001D535',
2006 "xhArr;": '\U000027FA',
2007 "xharr;": '\U000027F7',
2008 "xi;": '\U000003BE',
2009 "xlArr;": '\U000027F8',
2010 "xlarr;": '\U000027F5',
2011 "xmap;": '\U000027FC',
2012 "xnis;": '\U000022FB',
2013 "xodot;": '\U00002A00',
2014 "xopf;": '\U0001D569',
2015 "xoplus;": '\U00002A01',
2016 "xotime;": '\U00002A02',
2017 "xrArr;": '\U000027F9',
2018 "xrarr;": '\U000027F6',
2019 "xscr;": '\U0001D4CD',
2020 "xsqcup;": '\U00002A06',
2021 "xuplus;": '\U00002A04',
2022 "xutri;": '\U000025B3',
2023 "xvee;": '\U000022C1',
2024 "xwedge;": '\U000022C0',
2025 "yacute;": '\U000000FD',
2026 "yacy;": '\U0000044F',
2027 "ycirc;": '\U00000177',
2028 "ycy;": '\U0000044B',
2029 "yen;": '\U000000A5',
2030 "yfr;": '\U0001D536',
2031 "yicy;": '\U00000457',
2032 "yopf;": '\U0001D56A',
2033 "yscr;": '\U0001D4CE',
2034 "yucy;": '\U0000044E',
2035 "yuml;": '\U000000FF',
2036 "zacute;": '\U0000017A',
2037 "zcaron;": '\U0000017E',
2038 "zcy;": '\U00000437',
2039 "zdot;": '\U0000017C',
2040 "zeetrf;": '\U00002128',
2041 "zeta;": '\U000003B6',
2042 "zfr;": '\U0001D537',
2043 "zhcy;": '\U00000436',
2044 "zigrarr;": '\U000021DD',
2045 "zopf;": '\U0001D56B',
2046 "zscr;": '\U0001D4CF',
2047 "zwj;": '\U0000200D',
2048 "zwnj;": '\U0000200C',
2049 "AElig": '\U000000C6',
2050 "AMP": '\U00000026',
2051 "Aacute": '\U000000C1',
2052 "Acirc": '\U000000C2',
2053 "Agrave": '\U000000C0',
2054 "Aring": '\U000000C5',
2055 "Atilde": '\U000000C3',
2056 "Auml": '\U000000C4',
2057 "COPY": '\U000000A9',
2058 "Ccedil": '\U000000C7',
2059 "ETH": '\U000000D0',
2060 "Eacute": '\U000000C9',
2061 "Ecirc": '\U000000CA',
2062 "Egrave": '\U000000C8',
2063 "Euml": '\U000000CB',
2064 "GT": '\U0000003E',
2065 "Iacute": '\U000000CD',
2066 "Icirc": '\U000000CE',
2067 "Igrave": '\U000000CC',
2068 "Iuml": '\U000000CF',
2069 "LT": '\U0000003C',
2070 "Ntilde": '\U000000D1',
2071 "Oacute": '\U000000D3',
2072 "Ocirc": '\U000000D4',
2073 "Ograve": '\U000000D2',
2074 "Oslash": '\U000000D8',
2075 "Otilde": '\U000000D5',
2076 "Ouml": '\U000000D6',
2077 "QUOT": '\U00000022',
2078 "REG": '\U000000AE',
2079 "THORN": '\U000000DE',
2080 "Uacute": '\U000000DA',
2081 "Ucirc": '\U000000DB',
2082 "Ugrave": '\U000000D9',
2083 "Uuml": '\U000000DC',
2084 "Yacute": '\U000000DD',
2085 "aacute": '\U000000E1',
2086 "acirc": '\U000000E2',
2087 "acute": '\U000000B4',
2088 "aelig": '\U000000E6',
2089 "agrave": '\U000000E0',
2090 "amp": '\U00000026',
2091 "aring": '\U000000E5',
2092 "atilde": '\U000000E3',
2093 "auml": '\U000000E4',
2094 "brvbar": '\U000000A6',
2095 "ccedil": '\U000000E7',
2096 "cedil": '\U000000B8',
2097 "cent": '\U000000A2',
2098 "copy": '\U000000A9',
2099 "curren": '\U000000A4',
2100 "deg": '\U000000B0',
2101 "divide": '\U000000F7',
2102 "eacute": '\U000000E9',
2103 "ecirc": '\U000000EA',
2104 "egrave": '\U000000E8',
2105 "eth": '\U000000F0',
2106 "euml": '\U000000EB',
2107 "frac12": '\U000000BD',
2108 "frac14": '\U000000BC',
2109 "frac34": '\U000000BE',
2110 "gt": '\U0000003E',
2111 "iacute": '\U000000ED',
2112 "icirc": '\U000000EE',
2113 "iexcl": '\U000000A1',
2114 "igrave": '\U000000EC',
2115 "iquest": '\U000000BF',
2116 "iuml": '\U000000EF',
2117 "laquo": '\U000000AB',
2118 "lt": '\U0000003C',
2119 "macr": '\U000000AF',
2120 "micro": '\U000000B5',
2121 "middot": '\U000000B7',
2122 "nbsp": '\U000000A0',
2123 "not": '\U000000AC',
2124 "ntilde": '\U000000F1',
2125 "oacute": '\U000000F3',
2126 "ocirc": '\U000000F4',
2127 "ograve": '\U000000F2',
2128 "ordf": '\U000000AA',
2129 "ordm": '\U000000BA',
2130 "oslash": '\U000000F8',
2131 "otilde": '\U000000F5',
2132 "ouml": '\U000000F6',
2133 "para": '\U000000B6',
2134 "plusmn": '\U000000B1',
2135 "pound": '\U000000A3',
2136 "quot": '\U00000022',
2137 "raquo": '\U000000BB',
2138 "reg": '\U000000AE',
2139 "sect": '\U000000A7',
2140 "shy": '\U000000AD',
2141 "sup1": '\U000000B9',
2142 "sup2": '\U000000B2',
2143 "sup3": '\U000000B3',
2144 "szlig": '\U000000DF',
2145 "thorn": '\U000000FE',
2146 "times": '\U000000D7',
2147 "uacute": '\U000000FA',
2148 "ucirc": '\U000000FB',
2149 "ugrave": '\U000000F9',
2150 "uml": '\U000000A8',
2151 "uuml": '\U000000FC',
2152 "yacute": '\U000000FD',
2153 "yen": '\U000000A5',
2154 "yuml": '\U000000FF',
2155}
2156
2157// HTML entities that are two unicode codepoints.
2158var entity2 = map[string][2]rune{
2159 // TODO(nigeltao): Handle replacements that are wider than their names.
2160 // "nLt;": {'\u226A', '\u20D2'},
2161 // "nGt;": {'\u226B', '\u20D2'},
2162 "NotEqualTilde;": {'\u2242', '\u0338'},
2163 "NotGreaterFullEqual;": {'\u2267', '\u0338'},
2164 "NotGreaterGreater;": {'\u226B', '\u0338'},
2165 "NotGreaterSlantEqual;": {'\u2A7E', '\u0338'},
2166 "NotHumpDownHump;": {'\u224E', '\u0338'},
2167 "NotHumpEqual;": {'\u224F', '\u0338'},
2168 "NotLeftTriangleBar;": {'\u29CF', '\u0338'},
2169 "NotLessLess;": {'\u226A', '\u0338'},
2170 "NotLessSlantEqual;": {'\u2A7D', '\u0338'},
2171 "NotNestedGreaterGreater;": {'\u2AA2', '\u0338'},
2172 "NotNestedLessLess;": {'\u2AA1', '\u0338'},
2173 "NotPrecedesEqual;": {'\u2AAF', '\u0338'},
2174 "NotRightTriangleBar;": {'\u29D0', '\u0338'},
2175 "NotSquareSubset;": {'\u228F', '\u0338'},
2176 "NotSquareSuperset;": {'\u2290', '\u0338'},
2177 "NotSubset;": {'\u2282', '\u20D2'},
2178 "NotSucceedsEqual;": {'\u2AB0', '\u0338'},
2179 "NotSucceedsTilde;": {'\u227F', '\u0338'},
2180 "NotSuperset;": {'\u2283', '\u20D2'},
2181 "ThickSpace;": {'\u205F', '\u200A'},
2182 "acE;": {'\u223E', '\u0333'},
2183 "bne;": {'\u003D', '\u20E5'},
2184 "bnequiv;": {'\u2261', '\u20E5'},
2185 "caps;": {'\u2229', '\uFE00'},
2186 "cups;": {'\u222A', '\uFE00'},
2187 "fjlig;": {'\u0066', '\u006A'},
2188 "gesl;": {'\u22DB', '\uFE00'},
2189 "gvertneqq;": {'\u2269', '\uFE00'},
2190 "gvnE;": {'\u2269', '\uFE00'},
2191 "lates;": {'\u2AAD', '\uFE00'},
2192 "lesg;": {'\u22DA', '\uFE00'},
2193 "lvertneqq;": {'\u2268', '\uFE00'},
2194 "lvnE;": {'\u2268', '\uFE00'},
2195 "nGg;": {'\u22D9', '\u0338'},
2196 "nGtv;": {'\u226B', '\u0338'},
2197 "nLl;": {'\u22D8', '\u0338'},
2198 "nLtv;": {'\u226A', '\u0338'},
2199 "nang;": {'\u2220', '\u20D2'},
2200 "napE;": {'\u2A70', '\u0338'},
2201 "napid;": {'\u224B', '\u0338'},
2202 "nbump;": {'\u224E', '\u0338'},
2203 "nbumpe;": {'\u224F', '\u0338'},
2204 "ncongdot;": {'\u2A6D', '\u0338'},
2205 "nedot;": {'\u2250', '\u0338'},
2206 "nesim;": {'\u2242', '\u0338'},
2207 "ngE;": {'\u2267', '\u0338'},
2208 "ngeqq;": {'\u2267', '\u0338'},
2209 "ngeqslant;": {'\u2A7E', '\u0338'},
2210 "nges;": {'\u2A7E', '\u0338'},
2211 "nlE;": {'\u2266', '\u0338'},
2212 "nleqq;": {'\u2266', '\u0338'},
2213 "nleqslant;": {'\u2A7D', '\u0338'},
2214 "nles;": {'\u2A7D', '\u0338'},
2215 "notinE;": {'\u22F9', '\u0338'},
2216 "notindot;": {'\u22F5', '\u0338'},
2217 "nparsl;": {'\u2AFD', '\u20E5'},
2218 "npart;": {'\u2202', '\u0338'},
2219 "npre;": {'\u2AAF', '\u0338'},
2220 "npreceq;": {'\u2AAF', '\u0338'},
2221 "nrarrc;": {'\u2933', '\u0338'},
2222 "nrarrw;": {'\u219D', '\u0338'},
2223 "nsce;": {'\u2AB0', '\u0338'},
2224 "nsubE;": {'\u2AC5', '\u0338'},
2225 "nsubset;": {'\u2282', '\u20D2'},
2226 "nsubseteqq;": {'\u2AC5', '\u0338'},
2227 "nsucceq;": {'\u2AB0', '\u0338'},
2228 "nsupE;": {'\u2AC6', '\u0338'},
2229 "nsupset;": {'\u2283', '\u20D2'},
2230 "nsupseteqq;": {'\u2AC6', '\u0338'},
2231 "nvap;": {'\u224D', '\u20D2'},
2232 "nvge;": {'\u2265', '\u20D2'},
2233 "nvgt;": {'\u003E', '\u20D2'},
2234 "nvle;": {'\u2264', '\u20D2'},
2235 "nvlt;": {'\u003C', '\u20D2'},
2236 "nvltrie;": {'\u22B4', '\u20D2'},
2237 "nvrtrie;": {'\u22B5', '\u20D2'},
2238 "nvsim;": {'\u223C', '\u20D2'},
2239 "race;": {'\u223D', '\u0331'},
2240 "smtes;": {'\u2AAC', '\uFE00'},
2241 "sqcaps;": {'\u2293', '\uFE00'},
2242 "sqcups;": {'\u2294', '\uFE00'},
2243 "varsubsetneq;": {'\u228A', '\uFE00'},
2244 "varsubsetneqq;": {'\u2ACB', '\uFE00'},
2245 "varsupsetneq;": {'\u228B', '\uFE00'},
2246 "varsupsetneqq;": {'\u2ACC', '\uFE00'},
2247 "vnsub;": {'\u2282', '\u20D2'},
2248 "vnsup;": {'\u2283', '\u20D2'},
2249 "vsubnE;": {'\u2ACB', '\uFE00'},
2250 "vsubne;": {'\u228A', '\uFE00'},
2251 "vsupnE;": {'\u2ACC', '\uFE00'},
2252 "vsupne;": {'\u228B', '\uFE00'},
2253}
diff --git a/vendor/golang.org/x/net/html/escape.go b/vendor/golang.org/x/net/html/escape.go
new file mode 100644
index 0000000..d856139
--- /dev/null
+++ b/vendor/golang.org/x/net/html/escape.go
@@ -0,0 +1,258 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bytes"
9 "strings"
10 "unicode/utf8"
11)
12
13// These replacements permit compatibility with old numeric entities that
14// assumed Windows-1252 encoding.
15// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
16var replacementTable = [...]rune{
17 '\u20AC', // First entry is what 0x80 should be replaced with.
18 '\u0081',
19 '\u201A',
20 '\u0192',
21 '\u201E',
22 '\u2026',
23 '\u2020',
24 '\u2021',
25 '\u02C6',
26 '\u2030',
27 '\u0160',
28 '\u2039',
29 '\u0152',
30 '\u008D',
31 '\u017D',
32 '\u008F',
33 '\u0090',
34 '\u2018',
35 '\u2019',
36 '\u201C',
37 '\u201D',
38 '\u2022',
39 '\u2013',
40 '\u2014',
41 '\u02DC',
42 '\u2122',
43 '\u0161',
44 '\u203A',
45 '\u0153',
46 '\u009D',
47 '\u017E',
48 '\u0178', // Last entry is 0x9F.
49 // 0x00->'\uFFFD' is handled programmatically.
50 // 0x0D->'\u000D' is a no-op.
51}
52
53// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
54// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
55// Precondition: b[src] == '&' && dst <= src.
56// attribute should be true if parsing an attribute value.
57func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
58 // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
59
60 // i starts at 1 because we already know that s[0] == '&'.
61 i, s := 1, b[src:]
62
63 if len(s) <= 1 {
64 b[dst] = b[src]
65 return dst + 1, src + 1
66 }
67
68 if s[i] == '#' {
69 if len(s) <= 3 { // We need to have at least "&#.".
70 b[dst] = b[src]
71 return dst + 1, src + 1
72 }
73 i++
74 c := s[i]
75 hex := false
76 if c == 'x' || c == 'X' {
77 hex = true
78 i++
79 }
80
81 x := '\x00'
82 for i < len(s) {
83 c = s[i]
84 i++
85 if hex {
86 if '0' <= c && c <= '9' {
87 x = 16*x + rune(c) - '0'
88 continue
89 } else if 'a' <= c && c <= 'f' {
90 x = 16*x + rune(c) - 'a' + 10
91 continue
92 } else if 'A' <= c && c <= 'F' {
93 x = 16*x + rune(c) - 'A' + 10
94 continue
95 }
96 } else if '0' <= c && c <= '9' {
97 x = 10*x + rune(c) - '0'
98 continue
99 }
100 if c != ';' {
101 i--
102 }
103 break
104 }
105
106 if i <= 3 { // No characters matched.
107 b[dst] = b[src]
108 return dst + 1, src + 1
109 }
110
111 if 0x80 <= x && x <= 0x9F {
112 // Replace characters from Windows-1252 with UTF-8 equivalents.
113 x = replacementTable[x-0x80]
114 } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
115 // Replace invalid characters with the replacement character.
116 x = '\uFFFD'
117 }
118
119 return dst + utf8.EncodeRune(b[dst:], x), src + i
120 }
121
122 // Consume the maximum number of characters possible, with the
123 // consumed characters matching one of the named references.
124
125 for i < len(s) {
126 c := s[i]
127 i++
128 // Lower-cased characters are more common in entities, so we check for them first.
129 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
130 continue
131 }
132 if c != ';' {
133 i--
134 }
135 break
136 }
137
138 entityName := string(s[1:i])
139 if entityName == "" {
140 // No-op.
141 } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
142 // No-op.
143 } else if x := entity[entityName]; x != 0 {
144 return dst + utf8.EncodeRune(b[dst:], x), src + i
145 } else if x := entity2[entityName]; x[0] != 0 {
146 dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
147 return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
148 } else if !attribute {
149 maxLen := len(entityName) - 1
150 if maxLen > longestEntityWithoutSemicolon {
151 maxLen = longestEntityWithoutSemicolon
152 }
153 for j := maxLen; j > 1; j-- {
154 if x := entity[entityName[:j]]; x != 0 {
155 return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
156 }
157 }
158 }
159
160 dst1, src1 = dst+i, src+i
161 copy(b[dst:dst1], b[src:src1])
162 return dst1, src1
163}
164
165// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
166// attribute should be true if parsing an attribute value.
167func unescape(b []byte, attribute bool) []byte {
168 for i, c := range b {
169 if c == '&' {
170 dst, src := unescapeEntity(b, i, i, attribute)
171 for src < len(b) {
172 c := b[src]
173 if c == '&' {
174 dst, src = unescapeEntity(b, dst, src, attribute)
175 } else {
176 b[dst] = c
177 dst, src = dst+1, src+1
178 }
179 }
180 return b[0:dst]
181 }
182 }
183 return b
184}
185
186// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
187func lower(b []byte) []byte {
188 for i, c := range b {
189 if 'A' <= c && c <= 'Z' {
190 b[i] = c + 'a' - 'A'
191 }
192 }
193 return b
194}
195
196const escapedChars = "&'<>\"\r"
197
198func escape(w writer, s string) error {
199 i := strings.IndexAny(s, escapedChars)
200 for i != -1 {
201 if _, err := w.WriteString(s[:i]); err != nil {
202 return err
203 }
204 var esc string
205 switch s[i] {
206 case '&':
207 esc = "&amp;"
208 case '\'':
209 // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
210 esc = "&#39;"
211 case '<':
212 esc = "&lt;"
213 case '>':
214 esc = "&gt;"
215 case '"':
216 // "&#34;" is shorter than "&quot;".
217 esc = "&#34;"
218 case '\r':
219 esc = "&#13;"
220 default:
221 panic("unrecognized escape character")
222 }
223 s = s[i+1:]
224 if _, err := w.WriteString(esc); err != nil {
225 return err
226 }
227 i = strings.IndexAny(s, escapedChars)
228 }
229 _, err := w.WriteString(s)
230 return err
231}
232
233// EscapeString escapes special characters like "<" to become "&lt;". It
234// escapes only five such characters: <, >, &, ' and ".
235// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
236// always true.
237func EscapeString(s string) string {
238 if strings.IndexAny(s, escapedChars) == -1 {
239 return s
240 }
241 var buf bytes.Buffer
242 escape(&buf, s)
243 return buf.String()
244}
245
246// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
247// larger range of entities than EscapeString escapes. For example, "&aacute;"
248// unescapes to "á", as does "&#225;" and "&xE1;".
249// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
250// always true.
251func UnescapeString(s string) string {
252 for _, c := range s {
253 if c == '&' {
254 return string(unescape([]byte(s), false))
255 }
256 }
257 return s
258}
diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go
new file mode 100644
index 0000000..d3b3844
--- /dev/null
+++ b/vendor/golang.org/x/net/html/foreign.go
@@ -0,0 +1,226 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "strings"
9)
10
11func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
12 for i := range aa {
13 if newName, ok := nameMap[aa[i].Key]; ok {
14 aa[i].Key = newName
15 }
16 }
17}
18
19func adjustForeignAttributes(aa []Attribute) {
20 for i, a := range aa {
21 if a.Key == "" || a.Key[0] != 'x' {
22 continue
23 }
24 switch a.Key {
25 case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
26 "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
27 j := strings.Index(a.Key, ":")
28 aa[i].Namespace = a.Key[:j]
29 aa[i].Key = a.Key[j+1:]
30 }
31 }
32}
33
34func htmlIntegrationPoint(n *Node) bool {
35 if n.Type != ElementNode {
36 return false
37 }
38 switch n.Namespace {
39 case "math":
40 if n.Data == "annotation-xml" {
41 for _, a := range n.Attr {
42 if a.Key == "encoding" {
43 val := strings.ToLower(a.Val)
44 if val == "text/html" || val == "application/xhtml+xml" {
45 return true
46 }
47 }
48 }
49 }
50 case "svg":
51 switch n.Data {
52 case "desc", "foreignObject", "title":
53 return true
54 }
55 }
56 return false
57}
58
59func mathMLTextIntegrationPoint(n *Node) bool {
60 if n.Namespace != "math" {
61 return false
62 }
63 switch n.Data {
64 case "mi", "mo", "mn", "ms", "mtext":
65 return true
66 }
67 return false
68}
69
70// Section 12.2.5.5.
71var breakout = map[string]bool{
72 "b": true,
73 "big": true,
74 "blockquote": true,
75 "body": true,
76 "br": true,
77 "center": true,
78 "code": true,
79 "dd": true,
80 "div": true,
81 "dl": true,
82 "dt": true,
83 "em": true,
84 "embed": true,
85 "h1": true,
86 "h2": true,
87 "h3": true,
88 "h4": true,
89 "h5": true,
90 "h6": true,
91 "head": true,
92 "hr": true,
93 "i": true,
94 "img": true,
95 "li": true,
96 "listing": true,
97 "menu": true,
98 "meta": true,
99 "nobr": true,
100 "ol": true,
101 "p": true,
102 "pre": true,
103 "ruby": true,
104 "s": true,
105 "small": true,
106 "span": true,
107 "strong": true,
108 "strike": true,
109 "sub": true,
110 "sup": true,
111 "table": true,
112 "tt": true,
113 "u": true,
114 "ul": true,
115 "var": true,
116}
117
118// Section 12.2.5.5.
119var svgTagNameAdjustments = map[string]string{
120 "altglyph": "altGlyph",
121 "altglyphdef": "altGlyphDef",
122 "altglyphitem": "altGlyphItem",
123 "animatecolor": "animateColor",
124 "animatemotion": "animateMotion",
125 "animatetransform": "animateTransform",
126 "clippath": "clipPath",
127 "feblend": "feBlend",
128 "fecolormatrix": "feColorMatrix",
129 "fecomponenttransfer": "feComponentTransfer",
130 "fecomposite": "feComposite",
131 "feconvolvematrix": "feConvolveMatrix",
132 "fediffuselighting": "feDiffuseLighting",
133 "fedisplacementmap": "feDisplacementMap",
134 "fedistantlight": "feDistantLight",
135 "feflood": "feFlood",
136 "fefunca": "feFuncA",
137 "fefuncb": "feFuncB",
138 "fefuncg": "feFuncG",
139 "fefuncr": "feFuncR",
140 "fegaussianblur": "feGaussianBlur",
141 "feimage": "feImage",
142 "femerge": "feMerge",
143 "femergenode": "feMergeNode",
144 "femorphology": "feMorphology",
145 "feoffset": "feOffset",
146 "fepointlight": "fePointLight",
147 "fespecularlighting": "feSpecularLighting",
148 "fespotlight": "feSpotLight",
149 "fetile": "feTile",
150 "feturbulence": "feTurbulence",
151 "foreignobject": "foreignObject",
152 "glyphref": "glyphRef",
153 "lineargradient": "linearGradient",
154 "radialgradient": "radialGradient",
155 "textpath": "textPath",
156}
157
158// Section 12.2.5.1
159var mathMLAttributeAdjustments = map[string]string{
160 "definitionurl": "definitionURL",
161}
162
163var svgAttributeAdjustments = map[string]string{
164 "attributename": "attributeName",
165 "attributetype": "attributeType",
166 "basefrequency": "baseFrequency",
167 "baseprofile": "baseProfile",
168 "calcmode": "calcMode",
169 "clippathunits": "clipPathUnits",
170 "contentscripttype": "contentScriptType",
171 "contentstyletype": "contentStyleType",
172 "diffuseconstant": "diffuseConstant",
173 "edgemode": "edgeMode",
174 "externalresourcesrequired": "externalResourcesRequired",
175 "filterres": "filterRes",
176 "filterunits": "filterUnits",
177 "glyphref": "glyphRef",
178 "gradienttransform": "gradientTransform",
179 "gradientunits": "gradientUnits",
180 "kernelmatrix": "kernelMatrix",
181 "kernelunitlength": "kernelUnitLength",
182 "keypoints": "keyPoints",
183 "keysplines": "keySplines",
184 "keytimes": "keyTimes",
185 "lengthadjust": "lengthAdjust",
186 "limitingconeangle": "limitingConeAngle",
187 "markerheight": "markerHeight",
188 "markerunits": "markerUnits",
189 "markerwidth": "markerWidth",
190 "maskcontentunits": "maskContentUnits",
191 "maskunits": "maskUnits",
192 "numoctaves": "numOctaves",
193 "pathlength": "pathLength",
194 "patterncontentunits": "patternContentUnits",
195 "patterntransform": "patternTransform",
196 "patternunits": "patternUnits",
197 "pointsatx": "pointsAtX",
198 "pointsaty": "pointsAtY",
199 "pointsatz": "pointsAtZ",
200 "preservealpha": "preserveAlpha",
201 "preserveaspectratio": "preserveAspectRatio",
202 "primitiveunits": "primitiveUnits",
203 "refx": "refX",
204 "refy": "refY",
205 "repeatcount": "repeatCount",
206 "repeatdur": "repeatDur",
207 "requiredextensions": "requiredExtensions",
208 "requiredfeatures": "requiredFeatures",
209 "specularconstant": "specularConstant",
210 "specularexponent": "specularExponent",
211 "spreadmethod": "spreadMethod",
212 "startoffset": "startOffset",
213 "stddeviation": "stdDeviation",
214 "stitchtiles": "stitchTiles",
215 "surfacescale": "surfaceScale",
216 "systemlanguage": "systemLanguage",
217 "tablevalues": "tableValues",
218 "targetx": "targetX",
219 "targety": "targetY",
220 "textlength": "textLength",
221 "viewbox": "viewBox",
222 "viewtarget": "viewTarget",
223 "xchannelselector": "xChannelSelector",
224 "ychannelselector": "yChannelSelector",
225 "zoomandpan": "zoomAndPan",
226}
diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go
new file mode 100644
index 0000000..26b657a
--- /dev/null
+++ b/vendor/golang.org/x/net/html/node.go
@@ -0,0 +1,193 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "golang.org/x/net/html/atom"
9)
10
11// A NodeType is the type of a Node.
12type NodeType uint32
13
14const (
15 ErrorNode NodeType = iota
16 TextNode
17 DocumentNode
18 ElementNode
19 CommentNode
20 DoctypeNode
21 scopeMarkerNode
22)
23
24// Section 12.2.3.3 says "scope markers are inserted when entering applet
25// elements, buttons, object elements, marquees, table cells, and table
26// captions, and are used to prevent formatting from 'leaking'".
27var scopeMarker = Node{Type: scopeMarkerNode}
28
29// A Node consists of a NodeType and some Data (tag name for element nodes,
30// content for text) and are part of a tree of Nodes. Element nodes may also
31// have a Namespace and contain a slice of Attributes. Data is unescaped, so
32// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
33// is the atom for Data, or zero if Data is not a known tag name.
34//
35// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
36// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
37// "svg" is short for "http://www.w3.org/2000/svg".
38type Node struct {
39 Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
40
41 Type NodeType
42 DataAtom atom.Atom
43 Data string
44 Namespace string
45 Attr []Attribute
46}
47
48// InsertBefore inserts newChild as a child of n, immediately before oldChild
49// in the sequence of n's children. oldChild may be nil, in which case newChild
50// is appended to the end of n's children.
51//
52// It will panic if newChild already has a parent or siblings.
53func (n *Node) InsertBefore(newChild, oldChild *Node) {
54 if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
55 panic("html: InsertBefore called for an attached child Node")
56 }
57 var prev, next *Node
58 if oldChild != nil {
59 prev, next = oldChild.PrevSibling, oldChild
60 } else {
61 prev = n.LastChild
62 }
63 if prev != nil {
64 prev.NextSibling = newChild
65 } else {
66 n.FirstChild = newChild
67 }
68 if next != nil {
69 next.PrevSibling = newChild
70 } else {
71 n.LastChild = newChild
72 }
73 newChild.Parent = n
74 newChild.PrevSibling = prev
75 newChild.NextSibling = next
76}
77
78// AppendChild adds a node c as a child of n.
79//
80// It will panic if c already has a parent or siblings.
81func (n *Node) AppendChild(c *Node) {
82 if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
83 panic("html: AppendChild called for an attached child Node")
84 }
85 last := n.LastChild
86 if last != nil {
87 last.NextSibling = c
88 } else {
89 n.FirstChild = c
90 }
91 n.LastChild = c
92 c.Parent = n
93 c.PrevSibling = last
94}
95
96// RemoveChild removes a node c that is a child of n. Afterwards, c will have
97// no parent and no siblings.
98//
99// It will panic if c's parent is not n.
100func (n *Node) RemoveChild(c *Node) {
101 if c.Parent != n {
102 panic("html: RemoveChild called for a non-child Node")
103 }
104 if n.FirstChild == c {
105 n.FirstChild = c.NextSibling
106 }
107 if c.NextSibling != nil {
108 c.NextSibling.PrevSibling = c.PrevSibling
109 }
110 if n.LastChild == c {
111 n.LastChild = c.PrevSibling
112 }
113 if c.PrevSibling != nil {
114 c.PrevSibling.NextSibling = c.NextSibling
115 }
116 c.Parent = nil
117 c.PrevSibling = nil
118 c.NextSibling = nil
119}
120
121// reparentChildren reparents all of src's child nodes to dst.
122func reparentChildren(dst, src *Node) {
123 for {
124 child := src.FirstChild
125 if child == nil {
126 break
127 }
128 src.RemoveChild(child)
129 dst.AppendChild(child)
130 }
131}
132
133// clone returns a new node with the same type, data and attributes.
134// The clone has no parent, no siblings and no children.
135func (n *Node) clone() *Node {
136 m := &Node{
137 Type: n.Type,
138 DataAtom: n.DataAtom,
139 Data: n.Data,
140 Attr: make([]Attribute, len(n.Attr)),
141 }
142 copy(m.Attr, n.Attr)
143 return m
144}
145
146// nodeStack is a stack of nodes.
147type nodeStack []*Node
148
149// pop pops the stack. It will panic if s is empty.
150func (s *nodeStack) pop() *Node {
151 i := len(*s)
152 n := (*s)[i-1]
153 *s = (*s)[:i-1]
154 return n
155}
156
157// top returns the most recently pushed node, or nil if s is empty.
158func (s *nodeStack) top() *Node {
159 if i := len(*s); i > 0 {
160 return (*s)[i-1]
161 }
162 return nil
163}
164
165// index returns the index of the top-most occurrence of n in the stack, or -1
166// if n is not present.
167func (s *nodeStack) index(n *Node) int {
168 for i := len(*s) - 1; i >= 0; i-- {
169 if (*s)[i] == n {
170 return i
171 }
172 }
173 return -1
174}
175
176// insert inserts a node at the given index.
177func (s *nodeStack) insert(i int, n *Node) {
178 (*s) = append(*s, nil)
179 copy((*s)[i+1:], (*s)[i:])
180 (*s)[i] = n
181}
182
183// remove removes a node from the stack. It is a no-op if n is not present.
184func (s *nodeStack) remove(n *Node) {
185 i := s.index(n)
186 if i == -1 {
187 return
188 }
189 copy((*s)[i:], (*s)[i+1:])
190 j := len(*s) - 1
191 (*s)[j] = nil
192 *s = (*s)[:j]
193}
diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go
new file mode 100644
index 0000000..be4b2bf
--- /dev/null
+++ b/vendor/golang.org/x/net/html/parse.go
@@ -0,0 +1,2094 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "errors"
9 "fmt"
10 "io"
11 "strings"
12
13 a "golang.org/x/net/html/atom"
14)
15
16// A parser implements the HTML5 parsing algorithm:
17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18type parser struct {
19 // tokenizer provides the tokens for the parser.
20 tokenizer *Tokenizer
21 // tok is the most recently read token.
22 tok Token
23 // Self-closing tags like <hr/> are treated as start tags, except that
24 // hasSelfClosingToken is set while they are being processed.
25 hasSelfClosingToken bool
26 // doc is the document root element.
27 doc *Node
28 // The stack of open elements (section 12.2.3.2) and active formatting
29 // elements (section 12.2.3.3).
30 oe, afe nodeStack
31 // Element pointers (section 12.2.3.4).
32 head, form *Node
33 // Other parsing state flags (section 12.2.3.5).
34 scripting, framesetOK bool
35 // im is the current insertion mode.
36 im insertionMode
37 // originalIM is the insertion mode to go back to after completing a text
38 // or inTableText insertion mode.
39 originalIM insertionMode
40 // fosterParenting is whether new elements should be inserted according to
41 // the foster parenting rules (section 12.2.5.3).
42 fosterParenting bool
43 // quirks is whether the parser is operating in "quirks mode."
44 quirks bool
45 // fragment is whether the parser is parsing an HTML fragment.
46 fragment bool
47 // context is the context element when parsing an HTML fragment
48 // (section 12.4).
49 context *Node
50}
51
52func (p *parser) top() *Node {
53 if n := p.oe.top(); n != nil {
54 return n
55 }
56 return p.doc
57}
58
59// Stop tags for use in popUntil. These come from section 12.2.3.2.
60var (
61 defaultScopeStopTags = map[string][]a.Atom{
62 "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
63 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
64 "svg": {a.Desc, a.ForeignObject, a.Title},
65 }
66)
67
68type scope int
69
70const (
71 defaultScope scope = iota
72 listItemScope
73 buttonScope
74 tableScope
75 tableRowScope
76 tableBodyScope
77 selectScope
78)
79
80// popUntil pops the stack of open elements at the highest element whose tag
81// is in matchTags, provided there is no higher element in the scope's stop
82// tags (as defined in section 12.2.3.2). It returns whether or not there was
83// such an element. If there was not, popUntil leaves the stack unchanged.
84//
85// For example, the set of stop tags for table scope is: "html", "table". If
86// the stack was:
87// ["html", "body", "font", "table", "b", "i", "u"]
88// then popUntil(tableScope, "font") would return false, but
89// popUntil(tableScope, "i") would return true and the stack would become:
90// ["html", "body", "font", "table", "b"]
91//
92// If an element's tag is in both the stop tags and matchTags, then the stack
93// will be popped and the function returns true (provided, of course, there was
94// no higher element in the stack that was also in the stop tags). For example,
95// popUntil(tableScope, "table") returns true and leaves:
96// ["html", "body", "font"]
97func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
98 if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
99 p.oe = p.oe[:i]
100 return true
101 }
102 return false
103}
104
105// indexOfElementInScope returns the index in p.oe of the highest element whose
106// tag is in matchTags that is in scope. If no matching element is in scope, it
107// returns -1.
108func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
109 for i := len(p.oe) - 1; i >= 0; i-- {
110 tagAtom := p.oe[i].DataAtom
111 if p.oe[i].Namespace == "" {
112 for _, t := range matchTags {
113 if t == tagAtom {
114 return i
115 }
116 }
117 switch s {
118 case defaultScope:
119 // No-op.
120 case listItemScope:
121 if tagAtom == a.Ol || tagAtom == a.Ul {
122 return -1
123 }
124 case buttonScope:
125 if tagAtom == a.Button {
126 return -1
127 }
128 case tableScope:
129 if tagAtom == a.Html || tagAtom == a.Table {
130 return -1
131 }
132 case selectScope:
133 if tagAtom != a.Optgroup && tagAtom != a.Option {
134 return -1
135 }
136 default:
137 panic("unreachable")
138 }
139 }
140 switch s {
141 case defaultScope, listItemScope, buttonScope:
142 for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
143 if t == tagAtom {
144 return -1
145 }
146 }
147 }
148 }
149 return -1
150}
151
152// elementInScope is like popUntil, except that it doesn't modify the stack of
153// open elements.
154func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
155 return p.indexOfElementInScope(s, matchTags...) != -1
156}
157
158// clearStackToContext pops elements off the stack of open elements until a
159// scope-defined element is found.
160func (p *parser) clearStackToContext(s scope) {
161 for i := len(p.oe) - 1; i >= 0; i-- {
162 tagAtom := p.oe[i].DataAtom
163 switch s {
164 case tableScope:
165 if tagAtom == a.Html || tagAtom == a.Table {
166 p.oe = p.oe[:i+1]
167 return
168 }
169 case tableRowScope:
170 if tagAtom == a.Html || tagAtom == a.Tr {
171 p.oe = p.oe[:i+1]
172 return
173 }
174 case tableBodyScope:
175 if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
176 p.oe = p.oe[:i+1]
177 return
178 }
179 default:
180 panic("unreachable")
181 }
182 }
183}
184
185// generateImpliedEndTags pops nodes off the stack of open elements as long as
186// the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
187// If exceptions are specified, nodes with that name will not be popped off.
188func (p *parser) generateImpliedEndTags(exceptions ...string) {
189 var i int
190loop:
191 for i = len(p.oe) - 1; i >= 0; i-- {
192 n := p.oe[i]
193 if n.Type == ElementNode {
194 switch n.DataAtom {
195 case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
196 for _, except := range exceptions {
197 if n.Data == except {
198 break loop
199 }
200 }
201 continue
202 }
203 }
204 break
205 }
206
207 p.oe = p.oe[:i+1]
208}
209
210// addChild adds a child node n to the top element, and pushes n onto the stack
211// of open elements if it is an element node.
212func (p *parser) addChild(n *Node) {
213 if p.shouldFosterParent() {
214 p.fosterParent(n)
215 } else {
216 p.top().AppendChild(n)
217 }
218
219 if n.Type == ElementNode {
220 p.oe = append(p.oe, n)
221 }
222}
223
224// shouldFosterParent returns whether the next node to be added should be
225// foster parented.
226func (p *parser) shouldFosterParent() bool {
227 if p.fosterParenting {
228 switch p.top().DataAtom {
229 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
230 return true
231 }
232 }
233 return false
234}
235
236// fosterParent adds a child node according to the foster parenting rules.
237// Section 12.2.5.3, "foster parenting".
238func (p *parser) fosterParent(n *Node) {
239 var table, parent, prev *Node
240 var i int
241 for i = len(p.oe) - 1; i >= 0; i-- {
242 if p.oe[i].DataAtom == a.Table {
243 table = p.oe[i]
244 break
245 }
246 }
247
248 if table == nil {
249 // The foster parent is the html element.
250 parent = p.oe[0]
251 } else {
252 parent = table.Parent
253 }
254 if parent == nil {
255 parent = p.oe[i-1]
256 }
257
258 if table != nil {
259 prev = table.PrevSibling
260 } else {
261 prev = parent.LastChild
262 }
263 if prev != nil && prev.Type == TextNode && n.Type == TextNode {
264 prev.Data += n.Data
265 return
266 }
267
268 parent.InsertBefore(n, table)
269}
270
271// addText adds text to the preceding node if it is a text node, or else it
272// calls addChild with a new text node.
273func (p *parser) addText(text string) {
274 if text == "" {
275 return
276 }
277
278 if p.shouldFosterParent() {
279 p.fosterParent(&Node{
280 Type: TextNode,
281 Data: text,
282 })
283 return
284 }
285
286 t := p.top()
287 if n := t.LastChild; n != nil && n.Type == TextNode {
288 n.Data += text
289 return
290 }
291 p.addChild(&Node{
292 Type: TextNode,
293 Data: text,
294 })
295}
296
297// addElement adds a child element based on the current token.
298func (p *parser) addElement() {
299 p.addChild(&Node{
300 Type: ElementNode,
301 DataAtom: p.tok.DataAtom,
302 Data: p.tok.Data,
303 Attr: p.tok.Attr,
304 })
305}
306
307// Section 12.2.3.3.
308func (p *parser) addFormattingElement() {
309 tagAtom, attr := p.tok.DataAtom, p.tok.Attr
310 p.addElement()
311
312 // Implement the Noah's Ark clause, but with three per family instead of two.
313 identicalElements := 0
314findIdenticalElements:
315 for i := len(p.afe) - 1; i >= 0; i-- {
316 n := p.afe[i]
317 if n.Type == scopeMarkerNode {
318 break
319 }
320 if n.Type != ElementNode {
321 continue
322 }
323 if n.Namespace != "" {
324 continue
325 }
326 if n.DataAtom != tagAtom {
327 continue
328 }
329 if len(n.Attr) != len(attr) {
330 continue
331 }
332 compareAttributes:
333 for _, t0 := range n.Attr {
334 for _, t1 := range attr {
335 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
336 // Found a match for this attribute, continue with the next attribute.
337 continue compareAttributes
338 }
339 }
340 // If we get here, there is no attribute that matches a.
341 // Therefore the element is not identical to the new one.
342 continue findIdenticalElements
343 }
344
345 identicalElements++
346 if identicalElements >= 3 {
347 p.afe.remove(n)
348 }
349 }
350
351 p.afe = append(p.afe, p.top())
352}
353
354// Section 12.2.3.3.
355func (p *parser) clearActiveFormattingElements() {
356 for {
357 n := p.afe.pop()
358 if len(p.afe) == 0 || n.Type == scopeMarkerNode {
359 return
360 }
361 }
362}
363
364// Section 12.2.3.3.
365func (p *parser) reconstructActiveFormattingElements() {
366 n := p.afe.top()
367 if n == nil {
368 return
369 }
370 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
371 return
372 }
373 i := len(p.afe) - 1
374 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
375 if i == 0 {
376 i = -1
377 break
378 }
379 i--
380 n = p.afe[i]
381 }
382 for {
383 i++
384 clone := p.afe[i].clone()
385 p.addChild(clone)
386 p.afe[i] = clone
387 if i == len(p.afe)-1 {
388 break
389 }
390 }
391}
392
393// Section 12.2.4.
394func (p *parser) acknowledgeSelfClosingTag() {
395 p.hasSelfClosingToken = false
396}
397
398// An insertion mode (section 12.2.3.1) is the state transition function from
399// a particular state in the HTML5 parser's state machine. It updates the
400// parser's fields depending on parser.tok (where ErrorToken means EOF).
401// It returns whether the token was consumed.
402type insertionMode func(*parser) bool
403
404// setOriginalIM sets the insertion mode to return to after completing a text or
405// inTableText insertion mode.
406// Section 12.2.3.1, "using the rules for".
407func (p *parser) setOriginalIM() {
408 if p.originalIM != nil {
409 panic("html: bad parser state: originalIM was set twice")
410 }
411 p.originalIM = p.im
412}
413
414// Section 12.2.3.1, "reset the insertion mode".
415func (p *parser) resetInsertionMode() {
416 for i := len(p.oe) - 1; i >= 0; i-- {
417 n := p.oe[i]
418 if i == 0 && p.context != nil {
419 n = p.context
420 }
421
422 switch n.DataAtom {
423 case a.Select:
424 p.im = inSelectIM
425 case a.Td, a.Th:
426 p.im = inCellIM
427 case a.Tr:
428 p.im = inRowIM
429 case a.Tbody, a.Thead, a.Tfoot:
430 p.im = inTableBodyIM
431 case a.Caption:
432 p.im = inCaptionIM
433 case a.Colgroup:
434 p.im = inColumnGroupIM
435 case a.Table:
436 p.im = inTableIM
437 case a.Head:
438 p.im = inBodyIM
439 case a.Body:
440 p.im = inBodyIM
441 case a.Frameset:
442 p.im = inFramesetIM
443 case a.Html:
444 p.im = beforeHeadIM
445 default:
446 continue
447 }
448 return
449 }
450 p.im = inBodyIM
451}
452
453const whitespace = " \t\r\n\f"
454
455// Section 12.2.5.4.1.
456func initialIM(p *parser) bool {
457 switch p.tok.Type {
458 case TextToken:
459 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
460 if len(p.tok.Data) == 0 {
461 // It was all whitespace, so ignore it.
462 return true
463 }
464 case CommentToken:
465 p.doc.AppendChild(&Node{
466 Type: CommentNode,
467 Data: p.tok.Data,
468 })
469 return true
470 case DoctypeToken:
471 n, quirks := parseDoctype(p.tok.Data)
472 p.doc.AppendChild(n)
473 p.quirks = quirks
474 p.im = beforeHTMLIM
475 return true
476 }
477 p.quirks = true
478 p.im = beforeHTMLIM
479 return false
480}
481
482// Section 12.2.5.4.2.
483func beforeHTMLIM(p *parser) bool {
484 switch p.tok.Type {
485 case DoctypeToken:
486 // Ignore the token.
487 return true
488 case TextToken:
489 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
490 if len(p.tok.Data) == 0 {
491 // It was all whitespace, so ignore it.
492 return true
493 }
494 case StartTagToken:
495 if p.tok.DataAtom == a.Html {
496 p.addElement()
497 p.im = beforeHeadIM
498 return true
499 }
500 case EndTagToken:
501 switch p.tok.DataAtom {
502 case a.Head, a.Body, a.Html, a.Br:
503 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
504 return false
505 default:
506 // Ignore the token.
507 return true
508 }
509 case CommentToken:
510 p.doc.AppendChild(&Node{
511 Type: CommentNode,
512 Data: p.tok.Data,
513 })
514 return true
515 }
516 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
517 return false
518}
519
520// Section 12.2.5.4.3.
521func beforeHeadIM(p *parser) bool {
522 switch p.tok.Type {
523 case TextToken:
524 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
525 if len(p.tok.Data) == 0 {
526 // It was all whitespace, so ignore it.
527 return true
528 }
529 case StartTagToken:
530 switch p.tok.DataAtom {
531 case a.Head:
532 p.addElement()
533 p.head = p.top()
534 p.im = inHeadIM
535 return true
536 case a.Html:
537 return inBodyIM(p)
538 }
539 case EndTagToken:
540 switch p.tok.DataAtom {
541 case a.Head, a.Body, a.Html, a.Br:
542 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
543 return false
544 default:
545 // Ignore the token.
546 return true
547 }
548 case CommentToken:
549 p.addChild(&Node{
550 Type: CommentNode,
551 Data: p.tok.Data,
552 })
553 return true
554 case DoctypeToken:
555 // Ignore the token.
556 return true
557 }
558
559 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
560 return false
561}
562
563// Section 12.2.5.4.4.
564func inHeadIM(p *parser) bool {
565 switch p.tok.Type {
566 case TextToken:
567 s := strings.TrimLeft(p.tok.Data, whitespace)
568 if len(s) < len(p.tok.Data) {
569 // Add the initial whitespace to the current node.
570 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
571 if s == "" {
572 return true
573 }
574 p.tok.Data = s
575 }
576 case StartTagToken:
577 switch p.tok.DataAtom {
578 case a.Html:
579 return inBodyIM(p)
580 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
581 p.addElement()
582 p.oe.pop()
583 p.acknowledgeSelfClosingTag()
584 return true
585 case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
586 p.addElement()
587 p.setOriginalIM()
588 p.im = textIM
589 return true
590 case a.Head:
591 // Ignore the token.
592 return true
593 }
594 case EndTagToken:
595 switch p.tok.DataAtom {
596 case a.Head:
597 n := p.oe.pop()
598 if n.DataAtom != a.Head {
599 panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
600 }
601 p.im = afterHeadIM
602 return true
603 case a.Body, a.Html, a.Br:
604 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
605 return false
606 default:
607 // Ignore the token.
608 return true
609 }
610 case CommentToken:
611 p.addChild(&Node{
612 Type: CommentNode,
613 Data: p.tok.Data,
614 })
615 return true
616 case DoctypeToken:
617 // Ignore the token.
618 return true
619 }
620
621 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
622 return false
623}
624
625// Section 12.2.5.4.6.
626func afterHeadIM(p *parser) bool {
627 switch p.tok.Type {
628 case TextToken:
629 s := strings.TrimLeft(p.tok.Data, whitespace)
630 if len(s) < len(p.tok.Data) {
631 // Add the initial whitespace to the current node.
632 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
633 if s == "" {
634 return true
635 }
636 p.tok.Data = s
637 }
638 case StartTagToken:
639 switch p.tok.DataAtom {
640 case a.Html:
641 return inBodyIM(p)
642 case a.Body:
643 p.addElement()
644 p.framesetOK = false
645 p.im = inBodyIM
646 return true
647 case a.Frameset:
648 p.addElement()
649 p.im = inFramesetIM
650 return true
651 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
652 p.oe = append(p.oe, p.head)
653 defer p.oe.remove(p.head)
654 return inHeadIM(p)
655 case a.Head:
656 // Ignore the token.
657 return true
658 }
659 case EndTagToken:
660 switch p.tok.DataAtom {
661 case a.Body, a.Html, a.Br:
662 // Drop down to creating an implied <body> tag.
663 default:
664 // Ignore the token.
665 return true
666 }
667 case CommentToken:
668 p.addChild(&Node{
669 Type: CommentNode,
670 Data: p.tok.Data,
671 })
672 return true
673 case DoctypeToken:
674 // Ignore the token.
675 return true
676 }
677
678 p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
679 p.framesetOK = true
680 return false
681}
682
683// copyAttributes copies attributes of src not found on dst to dst.
684func copyAttributes(dst *Node, src Token) {
685 if len(src.Attr) == 0 {
686 return
687 }
688 attr := map[string]string{}
689 for _, t := range dst.Attr {
690 attr[t.Key] = t.Val
691 }
692 for _, t := range src.Attr {
693 if _, ok := attr[t.Key]; !ok {
694 dst.Attr = append(dst.Attr, t)
695 attr[t.Key] = t.Val
696 }
697 }
698}
699
700// Section 12.2.5.4.7.
701func inBodyIM(p *parser) bool {
702 switch p.tok.Type {
703 case TextToken:
704 d := p.tok.Data
705 switch n := p.oe.top(); n.DataAtom {
706 case a.Pre, a.Listing:
707 if n.FirstChild == nil {
708 // Ignore a newline at the start of a <pre> block.
709 if d != "" && d[0] == '\r' {
710 d = d[1:]
711 }
712 if d != "" && d[0] == '\n' {
713 d = d[1:]
714 }
715 }
716 }
717 d = strings.Replace(d, "\x00", "", -1)
718 if d == "" {
719 return true
720 }
721 p.reconstructActiveFormattingElements()
722 p.addText(d)
723 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
724 // There were non-whitespace characters inserted.
725 p.framesetOK = false
726 }
727 case StartTagToken:
728 switch p.tok.DataAtom {
729 case a.Html:
730 copyAttributes(p.oe[0], p.tok)
731 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
732 return inHeadIM(p)
733 case a.Body:
734 if len(p.oe) >= 2 {
735 body := p.oe[1]
736 if body.Type == ElementNode && body.DataAtom == a.Body {
737 p.framesetOK = false
738 copyAttributes(body, p.tok)
739 }
740 }
741 case a.Frameset:
742 if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
743 // Ignore the token.
744 return true
745 }
746 body := p.oe[1]
747 if body.Parent != nil {
748 body.Parent.RemoveChild(body)
749 }
750 p.oe = p.oe[:1]
751 p.addElement()
752 p.im = inFramesetIM
753 return true
754 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
755 p.popUntil(buttonScope, a.P)
756 p.addElement()
757 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
758 p.popUntil(buttonScope, a.P)
759 switch n := p.top(); n.DataAtom {
760 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
761 p.oe.pop()
762 }
763 p.addElement()
764 case a.Pre, a.Listing:
765 p.popUntil(buttonScope, a.P)
766 p.addElement()
767 // The newline, if any, will be dealt with by the TextToken case.
768 p.framesetOK = false
769 case a.Form:
770 if p.form == nil {
771 p.popUntil(buttonScope, a.P)
772 p.addElement()
773 p.form = p.top()
774 }
775 case a.Li:
776 p.framesetOK = false
777 for i := len(p.oe) - 1; i >= 0; i-- {
778 node := p.oe[i]
779 switch node.DataAtom {
780 case a.Li:
781 p.oe = p.oe[:i]
782 case a.Address, a.Div, a.P:
783 continue
784 default:
785 if !isSpecialElement(node) {
786 continue
787 }
788 }
789 break
790 }
791 p.popUntil(buttonScope, a.P)
792 p.addElement()
793 case a.Dd, a.Dt:
794 p.framesetOK = false
795 for i := len(p.oe) - 1; i >= 0; i-- {
796 node := p.oe[i]
797 switch node.DataAtom {
798 case a.Dd, a.Dt:
799 p.oe = p.oe[:i]
800 case a.Address, a.Div, a.P:
801 continue
802 default:
803 if !isSpecialElement(node) {
804 continue
805 }
806 }
807 break
808 }
809 p.popUntil(buttonScope, a.P)
810 p.addElement()
811 case a.Plaintext:
812 p.popUntil(buttonScope, a.P)
813 p.addElement()
814 case a.Button:
815 p.popUntil(defaultScope, a.Button)
816 p.reconstructActiveFormattingElements()
817 p.addElement()
818 p.framesetOK = false
819 case a.A:
820 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
821 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
822 p.inBodyEndTagFormatting(a.A)
823 p.oe.remove(n)
824 p.afe.remove(n)
825 break
826 }
827 }
828 p.reconstructActiveFormattingElements()
829 p.addFormattingElement()
830 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
831 p.reconstructActiveFormattingElements()
832 p.addFormattingElement()
833 case a.Nobr:
834 p.reconstructActiveFormattingElements()
835 if p.elementInScope(defaultScope, a.Nobr) {
836 p.inBodyEndTagFormatting(a.Nobr)
837 p.reconstructActiveFormattingElements()
838 }
839 p.addFormattingElement()
840 case a.Applet, a.Marquee, a.Object:
841 p.reconstructActiveFormattingElements()
842 p.addElement()
843 p.afe = append(p.afe, &scopeMarker)
844 p.framesetOK = false
845 case a.Table:
846 if !p.quirks {
847 p.popUntil(buttonScope, a.P)
848 }
849 p.addElement()
850 p.framesetOK = false
851 p.im = inTableIM
852 return true
853 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
854 p.reconstructActiveFormattingElements()
855 p.addElement()
856 p.oe.pop()
857 p.acknowledgeSelfClosingTag()
858 if p.tok.DataAtom == a.Input {
859 for _, t := range p.tok.Attr {
860 if t.Key == "type" {
861 if strings.ToLower(t.Val) == "hidden" {
862 // Skip setting framesetOK = false
863 return true
864 }
865 }
866 }
867 }
868 p.framesetOK = false
869 case a.Param, a.Source, a.Track:
870 p.addElement()
871 p.oe.pop()
872 p.acknowledgeSelfClosingTag()
873 case a.Hr:
874 p.popUntil(buttonScope, a.P)
875 p.addElement()
876 p.oe.pop()
877 p.acknowledgeSelfClosingTag()
878 p.framesetOK = false
879 case a.Image:
880 p.tok.DataAtom = a.Img
881 p.tok.Data = a.Img.String()
882 return false
883 case a.Isindex:
884 if p.form != nil {
885 // Ignore the token.
886 return true
887 }
888 action := ""
889 prompt := "This is a searchable index. Enter search keywords: "
890 attr := []Attribute{{Key: "name", Val: "isindex"}}
891 for _, t := range p.tok.Attr {
892 switch t.Key {
893 case "action":
894 action = t.Val
895 case "name":
896 // Ignore the attribute.
897 case "prompt":
898 prompt = t.Val
899 default:
900 attr = append(attr, t)
901 }
902 }
903 p.acknowledgeSelfClosingTag()
904 p.popUntil(buttonScope, a.P)
905 p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
906 if action != "" {
907 p.form.Attr = []Attribute{{Key: "action", Val: action}}
908 }
909 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
910 p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
911 p.addText(prompt)
912 p.addChild(&Node{
913 Type: ElementNode,
914 DataAtom: a.Input,
915 Data: a.Input.String(),
916 Attr: attr,
917 })
918 p.oe.pop()
919 p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
920 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
921 p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
922 case a.Textarea:
923 p.addElement()
924 p.setOriginalIM()
925 p.framesetOK = false
926 p.im = textIM
927 case a.Xmp:
928 p.popUntil(buttonScope, a.P)
929 p.reconstructActiveFormattingElements()
930 p.framesetOK = false
931 p.addElement()
932 p.setOriginalIM()
933 p.im = textIM
934 case a.Iframe:
935 p.framesetOK = false
936 p.addElement()
937 p.setOriginalIM()
938 p.im = textIM
939 case a.Noembed, a.Noscript:
940 p.addElement()
941 p.setOriginalIM()
942 p.im = textIM
943 case a.Select:
944 p.reconstructActiveFormattingElements()
945 p.addElement()
946 p.framesetOK = false
947 p.im = inSelectIM
948 return true
949 case a.Optgroup, a.Option:
950 if p.top().DataAtom == a.Option {
951 p.oe.pop()
952 }
953 p.reconstructActiveFormattingElements()
954 p.addElement()
955 case a.Rp, a.Rt:
956 if p.elementInScope(defaultScope, a.Ruby) {
957 p.generateImpliedEndTags()
958 }
959 p.addElement()
960 case a.Math, a.Svg:
961 p.reconstructActiveFormattingElements()
962 if p.tok.DataAtom == a.Math {
963 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
964 } else {
965 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
966 }
967 adjustForeignAttributes(p.tok.Attr)
968 p.addElement()
969 p.top().Namespace = p.tok.Data
970 if p.hasSelfClosingToken {
971 p.oe.pop()
972 p.acknowledgeSelfClosingTag()
973 }
974 return true
975 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
976 // Ignore the token.
977 default:
978 p.reconstructActiveFormattingElements()
979 p.addElement()
980 }
981 case EndTagToken:
982 switch p.tok.DataAtom {
983 case a.Body:
984 if p.elementInScope(defaultScope, a.Body) {
985 p.im = afterBodyIM
986 }
987 case a.Html:
988 if p.elementInScope(defaultScope, a.Body) {
989 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
990 return false
991 }
992 return true
993 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
994 p.popUntil(defaultScope, p.tok.DataAtom)
995 case a.Form:
996 node := p.form
997 p.form = nil
998 i := p.indexOfElementInScope(defaultScope, a.Form)
999 if node == nil || i == -1 || p.oe[i] != node {
1000 // Ignore the token.
1001 return true
1002 }
1003 p.generateImpliedEndTags()
1004 p.oe.remove(node)
1005 case a.P:
1006 if !p.elementInScope(buttonScope, a.P) {
1007 p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1008 }
1009 p.popUntil(buttonScope, a.P)
1010 case a.Li:
1011 p.popUntil(listItemScope, a.Li)
1012 case a.Dd, a.Dt:
1013 p.popUntil(defaultScope, p.tok.DataAtom)
1014 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1015 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1016 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1017 p.inBodyEndTagFormatting(p.tok.DataAtom)
1018 case a.Applet, a.Marquee, a.Object:
1019 if p.popUntil(defaultScope, p.tok.DataAtom) {
1020 p.clearActiveFormattingElements()
1021 }
1022 case a.Br:
1023 p.tok.Type = StartTagToken
1024 return false
1025 default:
1026 p.inBodyEndTagOther(p.tok.DataAtom)
1027 }
1028 case CommentToken:
1029 p.addChild(&Node{
1030 Type: CommentNode,
1031 Data: p.tok.Data,
1032 })
1033 }
1034
1035 return true
1036}
1037
1038func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
1039 // This is the "adoption agency" algorithm, described at
1040 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1041
1042 // TODO: this is a fairly literal line-by-line translation of that algorithm.
1043 // Once the code successfully parses the comprehensive test suite, we should
1044 // refactor this code to be more idiomatic.
1045
1046 // Steps 1-4. The outer loop.
1047 for i := 0; i < 8; i++ {
1048 // Step 5. Find the formatting element.
1049 var formattingElement *Node
1050 for j := len(p.afe) - 1; j >= 0; j-- {
1051 if p.afe[j].Type == scopeMarkerNode {
1052 break
1053 }
1054 if p.afe[j].DataAtom == tagAtom {
1055 formattingElement = p.afe[j]
1056 break
1057 }
1058 }
1059 if formattingElement == nil {
1060 p.inBodyEndTagOther(tagAtom)
1061 return
1062 }
1063 feIndex := p.oe.index(formattingElement)
1064 if feIndex == -1 {
1065 p.afe.remove(formattingElement)
1066 return
1067 }
1068 if !p.elementInScope(defaultScope, tagAtom) {
1069 // Ignore the tag.
1070 return
1071 }
1072
1073 // Steps 9-10. Find the furthest block.
1074 var furthestBlock *Node
1075 for _, e := range p.oe[feIndex:] {
1076 if isSpecialElement(e) {
1077 furthestBlock = e
1078 break
1079 }
1080 }
1081 if furthestBlock == nil {
1082 e := p.oe.pop()
1083 for e != formattingElement {
1084 e = p.oe.pop()
1085 }
1086 p.afe.remove(e)
1087 return
1088 }
1089
1090 // Steps 11-12. Find the common ancestor and bookmark node.
1091 commonAncestor := p.oe[feIndex-1]
1092 bookmark := p.afe.index(formattingElement)
1093
1094 // Step 13. The inner loop. Find the lastNode to reparent.
1095 lastNode := furthestBlock
1096 node := furthestBlock
1097 x := p.oe.index(node)
1098 // Steps 13.1-13.2
1099 for j := 0; j < 3; j++ {
1100 // Step 13.3.
1101 x--
1102 node = p.oe[x]
1103 // Step 13.4 - 13.5.
1104 if p.afe.index(node) == -1 {
1105 p.oe.remove(node)
1106 continue
1107 }
1108 // Step 13.6.
1109 if node == formattingElement {
1110 break
1111 }
1112 // Step 13.7.
1113 clone := node.clone()
1114 p.afe[p.afe.index(node)] = clone
1115 p.oe[p.oe.index(node)] = clone
1116 node = clone
1117 // Step 13.8.
1118 if lastNode == furthestBlock {
1119 bookmark = p.afe.index(node) + 1
1120 }
1121 // Step 13.9.
1122 if lastNode.Parent != nil {
1123 lastNode.Parent.RemoveChild(lastNode)
1124 }
1125 node.AppendChild(lastNode)
1126 // Step 13.10.
1127 lastNode = node
1128 }
1129
1130 // Step 14. Reparent lastNode to the common ancestor,
1131 // or for misnested table nodes, to the foster parent.
1132 if lastNode.Parent != nil {
1133 lastNode.Parent.RemoveChild(lastNode)
1134 }
1135 switch commonAncestor.DataAtom {
1136 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1137 p.fosterParent(lastNode)
1138 default:
1139 commonAncestor.AppendChild(lastNode)
1140 }
1141
1142 // Steps 15-17. Reparent nodes from the furthest block's children
1143 // to a clone of the formatting element.
1144 clone := formattingElement.clone()
1145 reparentChildren(clone, furthestBlock)
1146 furthestBlock.AppendChild(clone)
1147
1148 // Step 18. Fix up the list of active formatting elements.
1149 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1150 // Move the bookmark with the rest of the list.
1151 bookmark--
1152 }
1153 p.afe.remove(formattingElement)
1154 p.afe.insert(bookmark, clone)
1155
1156 // Step 19. Fix up the stack of open elements.
1157 p.oe.remove(formattingElement)
1158 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1159 }
1160}
1161
1162// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1163// "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content
1164// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1165func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
1166 for i := len(p.oe) - 1; i >= 0; i-- {
1167 if p.oe[i].DataAtom == tagAtom {
1168 p.oe = p.oe[:i]
1169 break
1170 }
1171 if isSpecialElement(p.oe[i]) {
1172 break
1173 }
1174 }
1175}
1176
1177// Section 12.2.5.4.8.
1178func textIM(p *parser) bool {
1179 switch p.tok.Type {
1180 case ErrorToken:
1181 p.oe.pop()
1182 case TextToken:
1183 d := p.tok.Data
1184 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1185 // Ignore a newline at the start of a <textarea> block.
1186 if d != "" && d[0] == '\r' {
1187 d = d[1:]
1188 }
1189 if d != "" && d[0] == '\n' {
1190 d = d[1:]
1191 }
1192 }
1193 if d == "" {
1194 return true
1195 }
1196 p.addText(d)
1197 return true
1198 case EndTagToken:
1199 p.oe.pop()
1200 }
1201 p.im = p.originalIM
1202 p.originalIM = nil
1203 return p.tok.Type == EndTagToken
1204}
1205
1206// Section 12.2.5.4.9.
1207func inTableIM(p *parser) bool {
1208 switch p.tok.Type {
1209 case ErrorToken:
1210 // Stop parsing.
1211 return true
1212 case TextToken:
1213 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1214 switch p.oe.top().DataAtom {
1215 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1216 if strings.Trim(p.tok.Data, whitespace) == "" {
1217 p.addText(p.tok.Data)
1218 return true
1219 }
1220 }
1221 case StartTagToken:
1222 switch p.tok.DataAtom {
1223 case a.Caption:
1224 p.clearStackToContext(tableScope)
1225 p.afe = append(p.afe, &scopeMarker)
1226 p.addElement()
1227 p.im = inCaptionIM
1228 return true
1229 case a.Colgroup:
1230 p.clearStackToContext(tableScope)
1231 p.addElement()
1232 p.im = inColumnGroupIM
1233 return true
1234 case a.Col:
1235 p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1236 return false
1237 case a.Tbody, a.Tfoot, a.Thead:
1238 p.clearStackToContext(tableScope)
1239 p.addElement()
1240 p.im = inTableBodyIM
1241 return true
1242 case a.Td, a.Th, a.Tr:
1243 p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1244 return false
1245 case a.Table:
1246 if p.popUntil(tableScope, a.Table) {
1247 p.resetInsertionMode()
1248 return false
1249 }
1250 // Ignore the token.
1251 return true
1252 case a.Style, a.Script:
1253 return inHeadIM(p)
1254 case a.Input:
1255 for _, t := range p.tok.Attr {
1256 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1257 p.addElement()
1258 p.oe.pop()
1259 return true
1260 }
1261 }
1262 // Otherwise drop down to the default action.
1263 case a.Form:
1264 if p.form != nil {
1265 // Ignore the token.
1266 return true
1267 }
1268 p.addElement()
1269 p.form = p.oe.pop()
1270 case a.Select:
1271 p.reconstructActiveFormattingElements()
1272 switch p.top().DataAtom {
1273 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1274 p.fosterParenting = true
1275 }
1276 p.addElement()
1277 p.fosterParenting = false
1278 p.framesetOK = false
1279 p.im = inSelectInTableIM
1280 return true
1281 }
1282 case EndTagToken:
1283 switch p.tok.DataAtom {
1284 case a.Table:
1285 if p.popUntil(tableScope, a.Table) {
1286 p.resetInsertionMode()
1287 return true
1288 }
1289 // Ignore the token.
1290 return true
1291 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1292 // Ignore the token.
1293 return true
1294 }
1295 case CommentToken:
1296 p.addChild(&Node{
1297 Type: CommentNode,
1298 Data: p.tok.Data,
1299 })
1300 return true
1301 case DoctypeToken:
1302 // Ignore the token.
1303 return true
1304 }
1305
1306 p.fosterParenting = true
1307 defer func() { p.fosterParenting = false }()
1308
1309 return inBodyIM(p)
1310}
1311
1312// Section 12.2.5.4.11.
1313func inCaptionIM(p *parser) bool {
1314 switch p.tok.Type {
1315 case StartTagToken:
1316 switch p.tok.DataAtom {
1317 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1318 if p.popUntil(tableScope, a.Caption) {
1319 p.clearActiveFormattingElements()
1320 p.im = inTableIM
1321 return false
1322 } else {
1323 // Ignore the token.
1324 return true
1325 }
1326 case a.Select:
1327 p.reconstructActiveFormattingElements()
1328 p.addElement()
1329 p.framesetOK = false
1330 p.im = inSelectInTableIM
1331 return true
1332 }
1333 case EndTagToken:
1334 switch p.tok.DataAtom {
1335 case a.Caption:
1336 if p.popUntil(tableScope, a.Caption) {
1337 p.clearActiveFormattingElements()
1338 p.im = inTableIM
1339 }
1340 return true
1341 case a.Table:
1342 if p.popUntil(tableScope, a.Caption) {
1343 p.clearActiveFormattingElements()
1344 p.im = inTableIM
1345 return false
1346 } else {
1347 // Ignore the token.
1348 return true
1349 }
1350 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1351 // Ignore the token.
1352 return true
1353 }
1354 }
1355 return inBodyIM(p)
1356}
1357
1358// Section 12.2.5.4.12.
1359func inColumnGroupIM(p *parser) bool {
1360 switch p.tok.Type {
1361 case TextToken:
1362 s := strings.TrimLeft(p.tok.Data, whitespace)
1363 if len(s) < len(p.tok.Data) {
1364 // Add the initial whitespace to the current node.
1365 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1366 if s == "" {
1367 return true
1368 }
1369 p.tok.Data = s
1370 }
1371 case CommentToken:
1372 p.addChild(&Node{
1373 Type: CommentNode,
1374 Data: p.tok.Data,
1375 })
1376 return true
1377 case DoctypeToken:
1378 // Ignore the token.
1379 return true
1380 case StartTagToken:
1381 switch p.tok.DataAtom {
1382 case a.Html:
1383 return inBodyIM(p)
1384 case a.Col:
1385 p.addElement()
1386 p.oe.pop()
1387 p.acknowledgeSelfClosingTag()
1388 return true
1389 }
1390 case EndTagToken:
1391 switch p.tok.DataAtom {
1392 case a.Colgroup:
1393 if p.oe.top().DataAtom != a.Html {
1394 p.oe.pop()
1395 p.im = inTableIM
1396 }
1397 return true
1398 case a.Col:
1399 // Ignore the token.
1400 return true
1401 }
1402 }
1403 if p.oe.top().DataAtom != a.Html {
1404 p.oe.pop()
1405 p.im = inTableIM
1406 return false
1407 }
1408 return true
1409}
1410
1411// Section 12.2.5.4.13.
1412func inTableBodyIM(p *parser) bool {
1413 switch p.tok.Type {
1414 case StartTagToken:
1415 switch p.tok.DataAtom {
1416 case a.Tr:
1417 p.clearStackToContext(tableBodyScope)
1418 p.addElement()
1419 p.im = inRowIM
1420 return true
1421 case a.Td, a.Th:
1422 p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1423 return false
1424 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1425 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1426 p.im = inTableIM
1427 return false
1428 }
1429 // Ignore the token.
1430 return true
1431 }
1432 case EndTagToken:
1433 switch p.tok.DataAtom {
1434 case a.Tbody, a.Tfoot, a.Thead:
1435 if p.elementInScope(tableScope, p.tok.DataAtom) {
1436 p.clearStackToContext(tableBodyScope)
1437 p.oe.pop()
1438 p.im = inTableIM
1439 }
1440 return true
1441 case a.Table:
1442 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1443 p.im = inTableIM
1444 return false
1445 }
1446 // Ignore the token.
1447 return true
1448 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1449 // Ignore the token.
1450 return true
1451 }
1452 case CommentToken:
1453 p.addChild(&Node{
1454 Type: CommentNode,
1455 Data: p.tok.Data,
1456 })
1457 return true
1458 }
1459
1460 return inTableIM(p)
1461}
1462
1463// Section 12.2.5.4.14.
1464func inRowIM(p *parser) bool {
1465 switch p.tok.Type {
1466 case StartTagToken:
1467 switch p.tok.DataAtom {
1468 case a.Td, a.Th:
1469 p.clearStackToContext(tableRowScope)
1470 p.addElement()
1471 p.afe = append(p.afe, &scopeMarker)
1472 p.im = inCellIM
1473 return true
1474 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1475 if p.popUntil(tableScope, a.Tr) {
1476 p.im = inTableBodyIM
1477 return false
1478 }
1479 // Ignore the token.
1480 return true
1481 }
1482 case EndTagToken:
1483 switch p.tok.DataAtom {
1484 case a.Tr:
1485 if p.popUntil(tableScope, a.Tr) {
1486 p.im = inTableBodyIM
1487 return true
1488 }
1489 // Ignore the token.
1490 return true
1491 case a.Table:
1492 if p.popUntil(tableScope, a.Tr) {
1493 p.im = inTableBodyIM
1494 return false
1495 }
1496 // Ignore the token.
1497 return true
1498 case a.Tbody, a.Tfoot, a.Thead:
1499 if p.elementInScope(tableScope, p.tok.DataAtom) {
1500 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1501 return false
1502 }
1503 // Ignore the token.
1504 return true
1505 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1506 // Ignore the token.
1507 return true
1508 }
1509 }
1510
1511 return inTableIM(p)
1512}
1513
1514// Section 12.2.5.4.15.
1515func inCellIM(p *parser) bool {
1516 switch p.tok.Type {
1517 case StartTagToken:
1518 switch p.tok.DataAtom {
1519 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1520 if p.popUntil(tableScope, a.Td, a.Th) {
1521 // Close the cell and reprocess.
1522 p.clearActiveFormattingElements()
1523 p.im = inRowIM
1524 return false
1525 }
1526 // Ignore the token.
1527 return true
1528 case a.Select:
1529 p.reconstructActiveFormattingElements()
1530 p.addElement()
1531 p.framesetOK = false
1532 p.im = inSelectInTableIM
1533 return true
1534 }
1535 case EndTagToken:
1536 switch p.tok.DataAtom {
1537 case a.Td, a.Th:
1538 if !p.popUntil(tableScope, p.tok.DataAtom) {
1539 // Ignore the token.
1540 return true
1541 }
1542 p.clearActiveFormattingElements()
1543 p.im = inRowIM
1544 return true
1545 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1546 // Ignore the token.
1547 return true
1548 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1549 if !p.elementInScope(tableScope, p.tok.DataAtom) {
1550 // Ignore the token.
1551 return true
1552 }
1553 // Close the cell and reprocess.
1554 p.popUntil(tableScope, a.Td, a.Th)
1555 p.clearActiveFormattingElements()
1556 p.im = inRowIM
1557 return false
1558 }
1559 }
1560 return inBodyIM(p)
1561}
1562
1563// Section 12.2.5.4.16.
1564func inSelectIM(p *parser) bool {
1565 switch p.tok.Type {
1566 case ErrorToken:
1567 // Stop parsing.
1568 return true
1569 case TextToken:
1570 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1571 case StartTagToken:
1572 switch p.tok.DataAtom {
1573 case a.Html:
1574 return inBodyIM(p)
1575 case a.Option:
1576 if p.top().DataAtom == a.Option {
1577 p.oe.pop()
1578 }
1579 p.addElement()
1580 case a.Optgroup:
1581 if p.top().DataAtom == a.Option {
1582 p.oe.pop()
1583 }
1584 if p.top().DataAtom == a.Optgroup {
1585 p.oe.pop()
1586 }
1587 p.addElement()
1588 case a.Select:
1589 p.tok.Type = EndTagToken
1590 return false
1591 case a.Input, a.Keygen, a.Textarea:
1592 if p.elementInScope(selectScope, a.Select) {
1593 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1594 return false
1595 }
1596 // In order to properly ignore <textarea>, we need to change the tokenizer mode.
1597 p.tokenizer.NextIsNotRawText()
1598 // Ignore the token.
1599 return true
1600 case a.Script:
1601 return inHeadIM(p)
1602 }
1603 case EndTagToken:
1604 switch p.tok.DataAtom {
1605 case a.Option:
1606 if p.top().DataAtom == a.Option {
1607 p.oe.pop()
1608 }
1609 case a.Optgroup:
1610 i := len(p.oe) - 1
1611 if p.oe[i].DataAtom == a.Option {
1612 i--
1613 }
1614 if p.oe[i].DataAtom == a.Optgroup {
1615 p.oe = p.oe[:i]
1616 }
1617 case a.Select:
1618 if p.popUntil(selectScope, a.Select) {
1619 p.resetInsertionMode()
1620 }
1621 }
1622 case CommentToken:
1623 p.addChild(&Node{
1624 Type: CommentNode,
1625 Data: p.tok.Data,
1626 })
1627 case DoctypeToken:
1628 // Ignore the token.
1629 return true
1630 }
1631
1632 return true
1633}
1634
1635// Section 12.2.5.4.17.
1636func inSelectInTableIM(p *parser) bool {
1637 switch p.tok.Type {
1638 case StartTagToken, EndTagToken:
1639 switch p.tok.DataAtom {
1640 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1641 if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
1642 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1643 return false
1644 } else {
1645 // Ignore the token.
1646 return true
1647 }
1648 }
1649 }
1650 return inSelectIM(p)
1651}
1652
1653// Section 12.2.5.4.18.
1654func afterBodyIM(p *parser) bool {
1655 switch p.tok.Type {
1656 case ErrorToken:
1657 // Stop parsing.
1658 return true
1659 case TextToken:
1660 s := strings.TrimLeft(p.tok.Data, whitespace)
1661 if len(s) == 0 {
1662 // It was all whitespace.
1663 return inBodyIM(p)
1664 }
1665 case StartTagToken:
1666 if p.tok.DataAtom == a.Html {
1667 return inBodyIM(p)
1668 }
1669 case EndTagToken:
1670 if p.tok.DataAtom == a.Html {
1671 if !p.fragment {
1672 p.im = afterAfterBodyIM
1673 }
1674 return true
1675 }
1676 case CommentToken:
1677 // The comment is attached to the <html> element.
1678 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1679 panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1680 }
1681 p.oe[0].AppendChild(&Node{
1682 Type: CommentNode,
1683 Data: p.tok.Data,
1684 })
1685 return true
1686 }
1687 p.im = inBodyIM
1688 return false
1689}
1690
1691// Section 12.2.5.4.19.
1692func inFramesetIM(p *parser) bool {
1693 switch p.tok.Type {
1694 case CommentToken:
1695 p.addChild(&Node{
1696 Type: CommentNode,
1697 Data: p.tok.Data,
1698 })
1699 case TextToken:
1700 // Ignore all text but whitespace.
1701 s := strings.Map(func(c rune) rune {
1702 switch c {
1703 case ' ', '\t', '\n', '\f', '\r':
1704 return c
1705 }
1706 return -1
1707 }, p.tok.Data)
1708 if s != "" {
1709 p.addText(s)
1710 }
1711 case StartTagToken:
1712 switch p.tok.DataAtom {
1713 case a.Html:
1714 return inBodyIM(p)
1715 case a.Frameset:
1716 p.addElement()
1717 case a.Frame:
1718 p.addElement()
1719 p.oe.pop()
1720 p.acknowledgeSelfClosingTag()
1721 case a.Noframes:
1722 return inHeadIM(p)
1723 }
1724 case EndTagToken:
1725 switch p.tok.DataAtom {
1726 case a.Frameset:
1727 if p.oe.top().DataAtom != a.Html {
1728 p.oe.pop()
1729 if p.oe.top().DataAtom != a.Frameset {
1730 p.im = afterFramesetIM
1731 return true
1732 }
1733 }
1734 }
1735 default:
1736 // Ignore the token.
1737 }
1738 return true
1739}
1740
1741// Section 12.2.5.4.20.
1742func afterFramesetIM(p *parser) bool {
1743 switch p.tok.Type {
1744 case CommentToken:
1745 p.addChild(&Node{
1746 Type: CommentNode,
1747 Data: p.tok.Data,
1748 })
1749 case TextToken:
1750 // Ignore all text but whitespace.
1751 s := strings.Map(func(c rune) rune {
1752 switch c {
1753 case ' ', '\t', '\n', '\f', '\r':
1754 return c
1755 }
1756 return -1
1757 }, p.tok.Data)
1758 if s != "" {
1759 p.addText(s)
1760 }
1761 case StartTagToken:
1762 switch p.tok.DataAtom {
1763 case a.Html:
1764 return inBodyIM(p)
1765 case a.Noframes:
1766 return inHeadIM(p)
1767 }
1768 case EndTagToken:
1769 switch p.tok.DataAtom {
1770 case a.Html:
1771 p.im = afterAfterFramesetIM
1772 return true
1773 }
1774 default:
1775 // Ignore the token.
1776 }
1777 return true
1778}
1779
1780// Section 12.2.5.4.21.
1781func afterAfterBodyIM(p *parser) bool {
1782 switch p.tok.Type {
1783 case ErrorToken:
1784 // Stop parsing.
1785 return true
1786 case TextToken:
1787 s := strings.TrimLeft(p.tok.Data, whitespace)
1788 if len(s) == 0 {
1789 // It was all whitespace.
1790 return inBodyIM(p)
1791 }
1792 case StartTagToken:
1793 if p.tok.DataAtom == a.Html {
1794 return inBodyIM(p)
1795 }
1796 case CommentToken:
1797 p.doc.AppendChild(&Node{
1798 Type: CommentNode,
1799 Data: p.tok.Data,
1800 })
1801 return true
1802 case DoctypeToken:
1803 return inBodyIM(p)
1804 }
1805 p.im = inBodyIM
1806 return false
1807}
1808
1809// Section 12.2.5.4.22.
1810func afterAfterFramesetIM(p *parser) bool {
1811 switch p.tok.Type {
1812 case CommentToken:
1813 p.doc.AppendChild(&Node{
1814 Type: CommentNode,
1815 Data: p.tok.Data,
1816 })
1817 case TextToken:
1818 // Ignore all text but whitespace.
1819 s := strings.Map(func(c rune) rune {
1820 switch c {
1821 case ' ', '\t', '\n', '\f', '\r':
1822 return c
1823 }
1824 return -1
1825 }, p.tok.Data)
1826 if s != "" {
1827 p.tok.Data = s
1828 return inBodyIM(p)
1829 }
1830 case StartTagToken:
1831 switch p.tok.DataAtom {
1832 case a.Html:
1833 return inBodyIM(p)
1834 case a.Noframes:
1835 return inHeadIM(p)
1836 }
1837 case DoctypeToken:
1838 return inBodyIM(p)
1839 default:
1840 // Ignore the token.
1841 }
1842 return true
1843}
1844
1845const whitespaceOrNUL = whitespace + "\x00"
1846
1847// Section 12.2.5.5.
1848func parseForeignContent(p *parser) bool {
1849 switch p.tok.Type {
1850 case TextToken:
1851 if p.framesetOK {
1852 p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
1853 }
1854 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
1855 p.addText(p.tok.Data)
1856 case CommentToken:
1857 p.addChild(&Node{
1858 Type: CommentNode,
1859 Data: p.tok.Data,
1860 })
1861 case StartTagToken:
1862 b := breakout[p.tok.Data]
1863 if p.tok.DataAtom == a.Font {
1864 loop:
1865 for _, attr := range p.tok.Attr {
1866 switch attr.Key {
1867 case "color", "face", "size":
1868 b = true
1869 break loop
1870 }
1871 }
1872 }
1873 if b {
1874 for i := len(p.oe) - 1; i >= 0; i-- {
1875 n := p.oe[i]
1876 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
1877 p.oe = p.oe[:i+1]
1878 break
1879 }
1880 }
1881 return false
1882 }
1883 switch p.top().Namespace {
1884 case "math":
1885 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1886 case "svg":
1887 // Adjust SVG tag names. The tokenizer lower-cases tag names, but
1888 // SVG wants e.g. "foreignObject" with a capital second "O".
1889 if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
1890 p.tok.DataAtom = a.Lookup([]byte(x))
1891 p.tok.Data = x
1892 }
1893 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1894 default:
1895 panic("html: bad parser state: unexpected namespace")
1896 }
1897 adjustForeignAttributes(p.tok.Attr)
1898 namespace := p.top().Namespace
1899 p.addElement()
1900 p.top().Namespace = namespace
1901 if namespace != "" {
1902 // Don't let the tokenizer go into raw text mode in foreign content
1903 // (e.g. in an SVG <title> tag).
1904 p.tokenizer.NextIsNotRawText()
1905 }
1906 if p.hasSelfClosingToken {
1907 p.oe.pop()
1908 p.acknowledgeSelfClosingTag()
1909 }
1910 case EndTagToken:
1911 for i := len(p.oe) - 1; i >= 0; i-- {
1912 if p.oe[i].Namespace == "" {
1913 return p.im(p)
1914 }
1915 if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
1916 p.oe = p.oe[:i]
1917 break
1918 }
1919 }
1920 return true
1921 default:
1922 // Ignore the token.
1923 }
1924 return true
1925}
1926
1927// Section 12.2.5.
1928func (p *parser) inForeignContent() bool {
1929 if len(p.oe) == 0 {
1930 return false
1931 }
1932 n := p.oe[len(p.oe)-1]
1933 if n.Namespace == "" {
1934 return false
1935 }
1936 if mathMLTextIntegrationPoint(n) {
1937 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
1938 return false
1939 }
1940 if p.tok.Type == TextToken {
1941 return false
1942 }
1943 }
1944 if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
1945 return false
1946 }
1947 if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
1948 return false
1949 }
1950 if p.tok.Type == ErrorToken {
1951 return false
1952 }
1953 return true
1954}
1955
1956// parseImpliedToken parses a token as though it had appeared in the parser's
1957// input.
1958func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
1959 realToken, selfClosing := p.tok, p.hasSelfClosingToken
1960 p.tok = Token{
1961 Type: t,
1962 DataAtom: dataAtom,
1963 Data: data,
1964 }
1965 p.hasSelfClosingToken = false
1966 p.parseCurrentToken()
1967 p.tok, p.hasSelfClosingToken = realToken, selfClosing
1968}
1969
1970// parseCurrentToken runs the current token through the parsing routines
1971// until it is consumed.
1972func (p *parser) parseCurrentToken() {
1973 if p.tok.Type == SelfClosingTagToken {
1974 p.hasSelfClosingToken = true
1975 p.tok.Type = StartTagToken
1976 }
1977
1978 consumed := false
1979 for !consumed {
1980 if p.inForeignContent() {
1981 consumed = parseForeignContent(p)
1982 } else {
1983 consumed = p.im(p)
1984 }
1985 }
1986
1987 if p.hasSelfClosingToken {
1988 // This is a parse error, but ignore it.
1989 p.hasSelfClosingToken = false
1990 }
1991}
1992
1993func (p *parser) parse() error {
1994 // Iterate until EOF. Any other error will cause an early return.
1995 var err error
1996 for err != io.EOF {
1997 // CDATA sections are allowed only in foreign content.
1998 n := p.oe.top()
1999 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2000 // Read and parse the next token.
2001 p.tokenizer.Next()
2002 p.tok = p.tokenizer.Token()
2003 if p.tok.Type == ErrorToken {
2004 err = p.tokenizer.Err()
2005 if err != nil && err != io.EOF {
2006 return err
2007 }
2008 }
2009 p.parseCurrentToken()
2010 }
2011 return nil
2012}
2013
2014// Parse returns the parse tree for the HTML from the given Reader.
2015// The input is assumed to be UTF-8 encoded.
2016func Parse(r io.Reader) (*Node, error) {
2017 p := &parser{
2018 tokenizer: NewTokenizer(r),
2019 doc: &Node{
2020 Type: DocumentNode,
2021 },
2022 scripting: true,
2023 framesetOK: true,
2024 im: initialIM,
2025 }
2026 err := p.parse()
2027 if err != nil {
2028 return nil, err
2029 }
2030 return p.doc, nil
2031}
2032
2033// ParseFragment parses a fragment of HTML and returns the nodes that were
2034// found. If the fragment is the InnerHTML for an existing element, pass that
2035// element in context.
2036func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2037 contextTag := ""
2038 if context != nil {
2039 if context.Type != ElementNode {
2040 return nil, errors.New("html: ParseFragment of non-element Node")
2041 }
2042 // The next check isn't just context.DataAtom.String() == context.Data because
2043 // it is valid to pass an element whose tag isn't a known atom. For example,
2044 // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2045 if context.DataAtom != a.Lookup([]byte(context.Data)) {
2046 return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2047 }
2048 contextTag = context.DataAtom.String()
2049 }
2050 p := &parser{
2051 tokenizer: NewTokenizerFragment(r, contextTag),
2052 doc: &Node{
2053 Type: DocumentNode,
2054 },
2055 scripting: true,
2056 fragment: true,
2057 context: context,
2058 }
2059
2060 root := &Node{
2061 Type: ElementNode,
2062 DataAtom: a.Html,
2063 Data: a.Html.String(),
2064 }
2065 p.doc.AppendChild(root)
2066 p.oe = nodeStack{root}
2067 p.resetInsertionMode()
2068
2069 for n := context; n != nil; n = n.Parent {
2070 if n.Type == ElementNode && n.DataAtom == a.Form {
2071 p.form = n
2072 break
2073 }
2074 }
2075
2076 err := p.parse()
2077 if err != nil {
2078 return nil, err
2079 }
2080
2081 parent := p.doc
2082 if context != nil {
2083 parent = root
2084 }
2085
2086 var result []*Node
2087 for c := parent.FirstChild; c != nil; {
2088 next := c.NextSibling
2089 parent.RemoveChild(c)
2090 result = append(result, c)
2091 c = next
2092 }
2093 return result, nil
2094}
diff --git a/vendor/golang.org/x/net/html/render.go b/vendor/golang.org/x/net/html/render.go
new file mode 100644
index 0000000..d34564f
--- /dev/null
+++ b/vendor/golang.org/x/net/html/render.go
@@ -0,0 +1,271 @@
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bufio"
9 "errors"
10 "fmt"
11 "io"
12 "strings"
13)
14
15type writer interface {
16 io.Writer
17 io.ByteWriter
18 WriteString(string) (int, error)
19}
20
21// Render renders the parse tree n to the given writer.
22//
23// Rendering is done on a 'best effort' basis: calling Parse on the output of
24// Render will always result in something similar to the original tree, but it
25// is not necessarily an exact clone unless the original tree was 'well-formed'.
26// 'Well-formed' is not easily specified; the HTML5 specification is
27// complicated.
28//
29// Calling Parse on arbitrary input typically results in a 'well-formed' parse
30// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
31// For example, in a 'well-formed' parse tree, no <a> element is a child of
32// another <a> element: parsing "<a><a>" results in two sibling elements.
33// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
34// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
35// children; the <a> is reparented to the <table>'s parent. However, calling
36// Parse on "<a><table><a>" does not return an error, but the result has an <a>
37// element with an <a> child, and is therefore not 'well-formed'.
38//
39// Programmatically constructed trees are typically also 'well-formed', but it
40// is possible to construct a tree that looks innocuous but, when rendered and
41// re-parsed, results in a different tree. A simple example is that a solitary
42// text node would become a tree containing <html>, <head> and <body> elements.
43// Another example is that the programmatic equivalent of "a<head>b</head>c"
44// becomes "<html><head><head/><body>abc</body></html>".
45func Render(w io.Writer, n *Node) error {
46 if x, ok := w.(writer); ok {
47 return render(x, n)
48 }
49 buf := bufio.NewWriter(w)
50 if err := render(buf, n); err != nil {
51 return err
52 }
53 return buf.Flush()
54}
55
56// plaintextAbort is returned from render1 when a <plaintext> element
57// has been rendered. No more end tags should be rendered after that.
58var plaintextAbort = errors.New("html: internal error (plaintext abort)")
59
60func render(w writer, n *Node) error {
61 err := render1(w, n)
62 if err == plaintextAbort {
63 err = nil
64 }
65 return err
66}
67
68func render1(w writer, n *Node) error {
69 // Render non-element nodes; these are the easy cases.
70 switch n.Type {
71 case ErrorNode:
72 return errors.New("html: cannot render an ErrorNode node")
73 case TextNode:
74 return escape(w, n.Data)
75 case DocumentNode:
76 for c := n.FirstChild; c != nil; c = c.NextSibling {
77 if err := render1(w, c); err != nil {
78 return err
79 }
80 }
81 return nil
82 case ElementNode:
83 // No-op.
84 case CommentNode:
85 if _, err := w.WriteString("<!--"); err != nil {
86 return err
87 }
88 if _, err := w.WriteString(n.Data); err != nil {
89 return err
90 }
91 if _, err := w.WriteString("-->"); err != nil {
92 return err
93 }
94 return nil
95 case DoctypeNode:
96 if _, err := w.WriteString("<!DOCTYPE "); err != nil {
97 return err
98 }
99 if _, err := w.WriteString(n.Data); err != nil {
100 return err
101 }
102 if n.Attr != nil {
103 var p, s string
104 for _, a := range n.Attr {
105 switch a.Key {
106 case "public":
107 p = a.Val
108 case "system":
109 s = a.Val
110 }
111 }
112 if p != "" {
113 if _, err := w.WriteString(" PUBLIC "); err != nil {
114 return err
115 }
116 if err := writeQuoted(w, p); err != nil {
117 return err
118 }
119 if s != "" {
120 if err := w.WriteByte(' '); err != nil {
121 return err
122 }
123 if err := writeQuoted(w, s); err != nil {
124 return err
125 }
126 }
127 } else if s != "" {
128 if _, err := w.WriteString(" SYSTEM "); err != nil {
129 return err
130 }
131 if err := writeQuoted(w, s); err != nil {
132 return err
133 }
134 }
135 }
136 return w.WriteByte('>')
137 default:
138 return errors.New("html: unknown node type")
139 }
140
141 // Render the <xxx> opening tag.
142 if err := w.WriteByte('<'); err != nil {
143 return err
144 }
145 if _, err := w.WriteString(n.Data); err != nil {
146 return err
147 }
148 for _, a := range n.Attr {
149 if err := w.WriteByte(' '); err != nil {
150 return err
151 }
152 if a.Namespace != "" {
153 if _, err := w.WriteString(a.Namespace); err != nil {
154 return err
155 }
156 if err := w.WriteByte(':'); err != nil {
157 return err
158 }
159 }
160 if _, err := w.WriteString(a.Key); err != nil {
161 return err
162 }
163 if _, err := w.WriteString(`="`); err != nil {
164 return err
165 }
166 if err := escape(w, a.Val); err != nil {
167 return err
168 }
169 if err := w.WriteByte('"'); err != nil {
170 return err
171 }
172 }
173 if voidElements[n.Data] {
174 if n.FirstChild != nil {
175 return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
176 }
177 _, err := w.WriteString("/>")
178 return err
179 }
180 if err := w.WriteByte('>'); err != nil {
181 return err
182 }
183
184 // Add initial newline where there is danger of a newline beging ignored.
185 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
186 switch n.Data {
187 case "pre", "listing", "textarea":
188 if err := w.WriteByte('\n'); err != nil {
189 return err
190 }
191 }
192 }
193
194 // Render any child nodes.
195 switch n.Data {
196 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
197 for c := n.FirstChild; c != nil; c = c.NextSibling {
198 if c.Type == TextNode {
199 if _, err := w.WriteString(c.Data); err != nil {
200 return err
201 }
202 } else {
203 if err := render1(w, c); err != nil {
204 return err
205 }
206 }
207 }
208 if n.Data == "plaintext" {
209 // Don't render anything else. <plaintext> must be the
210 // last element in the file, with no closing tag.
211 return plaintextAbort
212 }
213 default:
214 for c := n.FirstChild; c != nil; c = c.NextSibling {
215 if err := render1(w, c); err != nil {
216 return err
217 }
218 }
219 }
220
221 // Render the </xxx> closing tag.
222 if _, err := w.WriteString("</"); err != nil {
223 return err
224 }
225 if _, err := w.WriteString(n.Data); err != nil {
226 return err
227 }
228 return w.WriteByte('>')
229}
230
231// writeQuoted writes s to w surrounded by quotes. Normally it will use double
232// quotes, but if s contains a double quote, it will use single quotes.
233// It is used for writing the identifiers in a doctype declaration.
234// In valid HTML, they can't contain both types of quotes.
235func writeQuoted(w writer, s string) error {
236 var q byte = '"'
237 if strings.Contains(s, `"`) {
238 q = '\''
239 }
240 if err := w.WriteByte(q); err != nil {
241 return err
242 }
243 if _, err := w.WriteString(s); err != nil {
244 return err
245 }
246 if err := w.WriteByte(q); err != nil {
247 return err
248 }
249 return nil
250}
251
252// Section 12.1.2, "Elements", gives this list of void elements. Void elements
253// are those that can't have any contents.
254var voidElements = map[string]bool{
255 "area": true,
256 "base": true,
257 "br": true,
258 "col": true,
259 "command": true,
260 "embed": true,
261 "hr": true,
262 "img": true,
263 "input": true,
264 "keygen": true,
265 "link": true,
266 "meta": true,
267 "param": true,
268 "source": true,
269 "track": true,
270 "wbr": true,
271}
diff --git a/vendor/golang.org/x/net/html/token.go b/vendor/golang.org/x/net/html/token.go
new file mode 100644
index 0000000..893e272
--- /dev/null
+++ b/vendor/golang.org/x/net/html/token.go
@@ -0,0 +1,1219 @@
1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bytes"
9 "errors"
10 "io"
11 "strconv"
12 "strings"
13
14 "golang.org/x/net/html/atom"
15)
16
17// A TokenType is the type of a Token.
18type TokenType uint32
19
20const (
21 // ErrorToken means that an error occurred during tokenization.
22 ErrorToken TokenType = iota
23 // TextToken means a text node.
24 TextToken
25 // A StartTagToken looks like <a>.
26 StartTagToken
27 // An EndTagToken looks like </a>.
28 EndTagToken
29 // A SelfClosingTagToken tag looks like <br/>.
30 SelfClosingTagToken
31 // A CommentToken looks like <!--x-->.
32 CommentToken
33 // A DoctypeToken looks like <!DOCTYPE x>
34 DoctypeToken
35)
36
37// ErrBufferExceeded means that the buffering limit was exceeded.
38var ErrBufferExceeded = errors.New("max buffer exceeded")
39
40// String returns a string representation of the TokenType.
41func (t TokenType) String() string {
42 switch t {
43 case ErrorToken:
44 return "Error"
45 case TextToken:
46 return "Text"
47 case StartTagToken:
48 return "StartTag"
49 case EndTagToken:
50 return "EndTag"
51 case SelfClosingTagToken:
52 return "SelfClosingTag"
53 case CommentToken:
54 return "Comment"
55 case DoctypeToken:
56 return "Doctype"
57 }
58 return "Invalid(" + strconv.Itoa(int(t)) + ")"
59}
60
61// An Attribute is an attribute namespace-key-value triple. Namespace is
62// non-empty for foreign attributes like xlink, Key is alphabetic (and hence
63// does not contain escapable characters like '&', '<' or '>'), and Val is
64// unescaped (it looks like "a<b" rather than "a&lt;b").
65//
66// Namespace is only used by the parser, not the tokenizer.
67type Attribute struct {
68 Namespace, Key, Val string
69}
70
71// A Token consists of a TokenType and some Data (tag name for start and end
72// tags, content for text, comments and doctypes). A tag Token may also contain
73// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
74// rather than "a&lt;b"). For tag Tokens, DataAtom is the atom for Data, or
75// zero if Data is not a known tag name.
76type Token struct {
77 Type TokenType
78 DataAtom atom.Atom
79 Data string
80 Attr []Attribute
81}
82
83// tagString returns a string representation of a tag Token's Data and Attr.
84func (t Token) tagString() string {
85 if len(t.Attr) == 0 {
86 return t.Data
87 }
88 buf := bytes.NewBufferString(t.Data)
89 for _, a := range t.Attr {
90 buf.WriteByte(' ')
91 buf.WriteString(a.Key)
92 buf.WriteString(`="`)
93 escape(buf, a.Val)
94 buf.WriteByte('"')
95 }
96 return buf.String()
97}
98
99// String returns a string representation of the Token.
100func (t Token) String() string {
101 switch t.Type {
102 case ErrorToken:
103 return ""
104 case TextToken:
105 return EscapeString(t.Data)
106 case StartTagToken:
107 return "<" + t.tagString() + ">"
108 case EndTagToken:
109 return "</" + t.tagString() + ">"
110 case SelfClosingTagToken:
111 return "<" + t.tagString() + "/>"
112 case CommentToken:
113 return "<!--" + t.Data + "-->"
114 case DoctypeToken:
115 return "<!DOCTYPE " + t.Data + ">"
116 }
117 return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
118}
119
120// span is a range of bytes in a Tokenizer's buffer. The start is inclusive,
121// the end is exclusive.
122type span struct {
123 start, end int
124}
125
126// A Tokenizer returns a stream of HTML Tokens.
127type Tokenizer struct {
128 // r is the source of the HTML text.
129 r io.Reader
130 // tt is the TokenType of the current token.
131 tt TokenType
132 // err is the first error encountered during tokenization. It is possible
133 // for tt != Error && err != nil to hold: this means that Next returned a
134 // valid token but the subsequent Next call will return an error token.
135 // For example, if the HTML text input was just "plain", then the first
136 // Next call would set z.err to io.EOF but return a TextToken, and all
137 // subsequent Next calls would return an ErrorToken.
138 // err is never reset. Once it becomes non-nil, it stays non-nil.
139 err error
140 // readErr is the error returned by the io.Reader r. It is separate from
141 // err because it is valid for an io.Reader to return (n int, err1 error)
142 // such that n > 0 && err1 != nil, and callers should always process the
143 // n > 0 bytes before considering the error err1.
144 readErr error
145 // buf[raw.start:raw.end] holds the raw bytes of the current token.
146 // buf[raw.end:] is buffered input that will yield future tokens.
147 raw span
148 buf []byte
149 // maxBuf limits the data buffered in buf. A value of 0 means unlimited.
150 maxBuf int
151 // buf[data.start:data.end] holds the raw bytes of the current token's data:
152 // a text token's text, a tag token's tag name, etc.
153 data span
154 // pendingAttr is the attribute key and value currently being tokenized.
155 // When complete, pendingAttr is pushed onto attr. nAttrReturned is
156 // incremented on each call to TagAttr.
157 pendingAttr [2]span
158 attr [][2]span
159 nAttrReturned int
160 // rawTag is the "script" in "</script>" that closes the next token. If
161 // non-empty, the subsequent call to Next will return a raw or RCDATA text
162 // token: one that treats "<p>" as text instead of an element.
163 // rawTag's contents are lower-cased.
164 rawTag string
165 // textIsRaw is whether the current text token's data is not escaped.
166 textIsRaw bool
167 // convertNUL is whether NUL bytes in the current token's data should
168 // be converted into \ufffd replacement characters.
169 convertNUL bool
170 // allowCDATA is whether CDATA sections are allowed in the current context.
171 allowCDATA bool
172}
173
174// AllowCDATA sets whether or not the tokenizer recognizes <![CDATA[foo]]> as
175// the text "foo". The default value is false, which means to recognize it as
176// a bogus comment "<!-- [CDATA[foo]] -->" instead.
177//
178// Strictly speaking, an HTML5 compliant tokenizer should allow CDATA if and
179// only if tokenizing foreign content, such as MathML and SVG. However,
180// tracking foreign-contentness is difficult to do purely in the tokenizer,
181// as opposed to the parser, due to HTML integration points: an <svg> element
182// can contain a <foreignObject> that is foreign-to-SVG but not foreign-to-
183// HTML. For strict compliance with the HTML5 tokenization algorithm, it is the
184// responsibility of the user of a tokenizer to call AllowCDATA as appropriate.
185// In practice, if using the tokenizer without caring whether MathML or SVG
186// CDATA is text or comments, such as tokenizing HTML to find all the anchor
187// text, it is acceptable to ignore this responsibility.
188func (z *Tokenizer) AllowCDATA(allowCDATA bool) {
189 z.allowCDATA = allowCDATA
190}
191
192// NextIsNotRawText instructs the tokenizer that the next token should not be
193// considered as 'raw text'. Some elements, such as script and title elements,
194// normally require the next token after the opening tag to be 'raw text' that
195// has no child elements. For example, tokenizing "<title>a<b>c</b>d</title>"
196// yields a start tag token for "<title>", a text token for "a<b>c</b>d", and
197// an end tag token for "</title>". There are no distinct start tag or end tag
198// tokens for the "<b>" and "</b>".
199//
200// This tokenizer implementation will generally look for raw text at the right
201// times. Strictly speaking, an HTML5 compliant tokenizer should not look for
202// raw text if in foreign content: <title> generally needs raw text, but a
203// <title> inside an <svg> does not. Another example is that a <textarea>
204// generally needs raw text, but a <textarea> is not allowed as an immediate
205// child of a <select>; in normal parsing, a <textarea> implies </select>, but
206// one cannot close the implicit element when parsing a <select>'s InnerHTML.
207// Similarly to AllowCDATA, tracking the correct moment to override raw-text-
208// ness is difficult to do purely in the tokenizer, as opposed to the parser.
209// For strict compliance with the HTML5 tokenization algorithm, it is the
210// responsibility of the user of a tokenizer to call NextIsNotRawText as
211// appropriate. In practice, like AllowCDATA, it is acceptable to ignore this
212// responsibility for basic usage.
213//
214// Note that this 'raw text' concept is different from the one offered by the
215// Tokenizer.Raw method.
216func (z *Tokenizer) NextIsNotRawText() {
217 z.rawTag = ""
218}
219
220// Err returns the error associated with the most recent ErrorToken token.
221// This is typically io.EOF, meaning the end of tokenization.
222func (z *Tokenizer) Err() error {
223 if z.tt != ErrorToken {
224 return nil
225 }
226 return z.err
227}
228
229// readByte returns the next byte from the input stream, doing a buffered read
230// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte
231// slice that holds all the bytes read so far for the current token.
232// It sets z.err if the underlying reader returns an error.
233// Pre-condition: z.err == nil.
234func (z *Tokenizer) readByte() byte {
235 if z.raw.end >= len(z.buf) {
236 // Our buffer is exhausted and we have to read from z.r. Check if the
237 // previous read resulted in an error.
238 if z.readErr != nil {
239 z.err = z.readErr
240 return 0
241 }
242 // We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length
243 // z.raw.end - z.raw.start is more than half the capacity of z.buf, then we
244 // allocate a new buffer before the copy.
245 c := cap(z.buf)
246 d := z.raw.end - z.raw.start
247 var buf1 []byte
248 if 2*d > c {
249 buf1 = make([]byte, d, 2*c)
250 } else {
251 buf1 = z.buf[:d]
252 }
253 copy(buf1, z.buf[z.raw.start:z.raw.end])
254 if x := z.raw.start; x != 0 {
255 // Adjust the data/attr spans to refer to the same contents after the copy.
256 z.data.start -= x
257 z.data.end -= x
258 z.pendingAttr[0].start -= x
259 z.pendingAttr[0].end -= x
260 z.pendingAttr[1].start -= x
261 z.pendingAttr[1].end -= x
262 for i := range z.attr {
263 z.attr[i][0].start -= x
264 z.attr[i][0].end -= x
265 z.attr[i][1].start -= x
266 z.attr[i][1].end -= x
267 }
268 }
269 z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d]
270 // Now that we have copied the live bytes to the start of the buffer,
271 // we read from z.r into the remainder.
272 var n int
273 n, z.readErr = readAtLeastOneByte(z.r, buf1[d:cap(buf1)])
274 if n == 0 {
275 z.err = z.readErr
276 return 0
277 }
278 z.buf = buf1[:d+n]
279 }
280 x := z.buf[z.raw.end]
281 z.raw.end++
282 if z.maxBuf > 0 && z.raw.end-z.raw.start >= z.maxBuf {
283 z.err = ErrBufferExceeded
284 return 0
285 }
286 return x
287}
288
289// Buffered returns a slice containing data buffered but not yet tokenized.
290func (z *Tokenizer) Buffered() []byte {
291 return z.buf[z.raw.end:]
292}
293
294// readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil).
295// It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil)
296// too many times in succession.
297func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
298 for i := 0; i < 100; i++ {
299 n, err := r.Read(b)
300 if n != 0 || err != nil {
301 return n, err
302 }
303 }
304 return 0, io.ErrNoProgress
305}
306
307// skipWhiteSpace skips past any white space.
308func (z *Tokenizer) skipWhiteSpace() {
309 if z.err != nil {
310 return
311 }
312 for {
313 c := z.readByte()
314 if z.err != nil {
315 return
316 }
317 switch c {
318 case ' ', '\n', '\r', '\t', '\f':
319 // No-op.
320 default:
321 z.raw.end--
322 return
323 }
324 }
325}
326
327// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
328// is typically something like "script" or "textarea".
329func (z *Tokenizer) readRawOrRCDATA() {
330 if z.rawTag == "script" {
331 z.readScript()
332 z.textIsRaw = true
333 z.rawTag = ""
334 return
335 }
336loop:
337 for {
338 c := z.readByte()
339 if z.err != nil {
340 break loop
341 }
342 if c != '<' {
343 continue loop
344 }
345 c = z.readByte()
346 if z.err != nil {
347 break loop
348 }
349 if c != '/' {
350 continue loop
351 }
352 if z.readRawEndTag() || z.err != nil {
353 break loop
354 }
355 }
356 z.data.end = z.raw.end
357 // A textarea's or title's RCDATA can contain escaped entities.
358 z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title"
359 z.rawTag = ""
360}
361
362// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag.
363// If it succeeds, it backs up the input position to reconsume the tag and
364// returns true. Otherwise it returns false. The opening "</" has already been
365// consumed.
366func (z *Tokenizer) readRawEndTag() bool {
367 for i := 0; i < len(z.rawTag); i++ {
368 c := z.readByte()
369 if z.err != nil {
370 return false
371 }
372 if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
373 z.raw.end--
374 return false
375 }
376 }
377 c := z.readByte()
378 if z.err != nil {
379 return false
380 }
381 switch c {
382 case ' ', '\n', '\r', '\t', '\f', '/', '>':
383 // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
384 z.raw.end -= 3 + len(z.rawTag)
385 return true
386 }
387 z.raw.end--
388 return false
389}
390
391// readScript reads until the next </script> tag, following the byzantine
392// rules for escaping/hiding the closing tag.
393func (z *Tokenizer) readScript() {
394 defer func() {
395 z.data.end = z.raw.end
396 }()
397 var c byte
398
399scriptData:
400 c = z.readByte()
401 if z.err != nil {
402 return
403 }
404 if c == '<' {
405 goto scriptDataLessThanSign
406 }
407 goto scriptData
408
409scriptDataLessThanSign:
410 c = z.readByte()
411 if z.err != nil {
412 return
413 }
414 switch c {
415 case '/':
416 goto scriptDataEndTagOpen
417 case '!':
418 goto scriptDataEscapeStart
419 }
420 z.raw.end--
421 goto scriptData
422
423scriptDataEndTagOpen:
424 if z.readRawEndTag() || z.err != nil {
425 return
426 }
427 goto scriptData
428
429scriptDataEscapeStart:
430 c = z.readByte()
431 if z.err != nil {
432 return
433 }
434 if c == '-' {
435 goto scriptDataEscapeStartDash
436 }
437 z.raw.end--
438 goto scriptData
439
440scriptDataEscapeStartDash:
441 c = z.readByte()
442 if z.err != nil {
443 return
444 }
445 if c == '-' {
446 goto scriptDataEscapedDashDash
447 }
448 z.raw.end--
449 goto scriptData
450
451scriptDataEscaped:
452 c = z.readByte()
453 if z.err != nil {
454 return
455 }
456 switch c {
457 case '-':
458 goto scriptDataEscapedDash
459 case '<':
460 goto scriptDataEscapedLessThanSign
461 }
462 goto scriptDataEscaped
463
464scriptDataEscapedDash:
465 c = z.readByte()
466 if z.err != nil {
467 return
468 }
469 switch c {
470 case '-':
471 goto scriptDataEscapedDashDash
472 case '<':
473 goto scriptDataEscapedLessThanSign
474 }
475 goto scriptDataEscaped
476
477scriptDataEscapedDashDash:
478 c = z.readByte()
479 if z.err != nil {
480 return
481 }
482 switch c {
483 case '-':
484 goto scriptDataEscapedDashDash
485 case '<':
486 goto scriptDataEscapedLessThanSign
487 case '>':
488 goto scriptData
489 }
490 goto scriptDataEscaped
491
492scriptDataEscapedLessThanSign:
493 c = z.readByte()
494 if z.err != nil {
495 return
496 }
497 if c == '/' {
498 goto scriptDataEscapedEndTagOpen
499 }
500 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
501 goto scriptDataDoubleEscapeStart
502 }
503 z.raw.end--
504 goto scriptData
505
506scriptDataEscapedEndTagOpen:
507 if z.readRawEndTag() || z.err != nil {
508 return
509 }
510 goto scriptDataEscaped
511
512scriptDataDoubleEscapeStart:
513 z.raw.end--
514 for i := 0; i < len("script"); i++ {
515 c = z.readByte()
516 if z.err != nil {
517 return
518 }
519 if c != "script"[i] && c != "SCRIPT"[i] {
520 z.raw.end--
521 goto scriptDataEscaped
522 }
523 }
524 c = z.readByte()
525 if z.err != nil {
526 return
527 }
528 switch c {
529 case ' ', '\n', '\r', '\t', '\f', '/', '>':
530 goto scriptDataDoubleEscaped
531 }
532 z.raw.end--
533 goto scriptDataEscaped
534
535scriptDataDoubleEscaped:
536 c = z.readByte()
537 if z.err != nil {
538 return
539 }
540 switch c {
541 case '-':
542 goto scriptDataDoubleEscapedDash
543 case '<':
544 goto scriptDataDoubleEscapedLessThanSign
545 }
546 goto scriptDataDoubleEscaped
547
548scriptDataDoubleEscapedDash:
549 c = z.readByte()
550 if z.err != nil {
551 return
552 }
553 switch c {
554 case '-':
555 goto scriptDataDoubleEscapedDashDash
556 case '<':
557 goto scriptDataDoubleEscapedLessThanSign
558 }
559 goto scriptDataDoubleEscaped
560
561scriptDataDoubleEscapedDashDash:
562 c = z.readByte()
563 if z.err != nil {
564 return
565 }
566 switch c {
567 case '-':
568 goto scriptDataDoubleEscapedDashDash
569 case '<':
570 goto scriptDataDoubleEscapedLessThanSign
571 case '>':
572 goto scriptData
573 }
574 goto scriptDataDoubleEscaped
575
576scriptDataDoubleEscapedLessThanSign:
577 c = z.readByte()
578 if z.err != nil {
579 return
580 }
581 if c == '/' {
582 goto scriptDataDoubleEscapeEnd
583 }
584 z.raw.end--
585 goto scriptDataDoubleEscaped
586
587scriptDataDoubleEscapeEnd:
588 if z.readRawEndTag() {
589 z.raw.end += len("</script>")
590 goto scriptDataEscaped
591 }
592 if z.err != nil {
593 return
594 }
595 goto scriptDataDoubleEscaped
596}
597
598// readComment reads the next comment token starting with "<!--". The opening
599// "<!--" has already been consumed.
600func (z *Tokenizer) readComment() {
601 z.data.start = z.raw.end
602 defer func() {
603 if z.data.end < z.data.start {
604 // It's a comment with no data, like <!-->.
605 z.data.end = z.data.start
606 }
607 }()
608 for dashCount := 2; ; {
609 c := z.readByte()
610 if z.err != nil {
611 // Ignore up to two dashes at EOF.
612 if dashCount > 2 {
613 dashCount = 2
614 }
615 z.data.end = z.raw.end - dashCount
616 return
617 }
618 switch c {
619 case '-':
620 dashCount++
621 continue
622 case '>':
623 if dashCount >= 2 {
624 z.data.end = z.raw.end - len("-->")
625 return
626 }
627 case '!':
628 if dashCount >= 2 {
629 c = z.readByte()
630 if z.err != nil {
631 z.data.end = z.raw.end
632 return
633 }
634 if c == '>' {
635 z.data.end = z.raw.end - len("--!>")
636 return
637 }
638 }
639 }
640 dashCount = 0
641 }
642}
643
644// readUntilCloseAngle reads until the next ">".
645func (z *Tokenizer) readUntilCloseAngle() {
646 z.data.start = z.raw.end
647 for {
648 c := z.readByte()
649 if z.err != nil {
650 z.data.end = z.raw.end
651 return
652 }
653 if c == '>' {
654 z.data.end = z.raw.end - len(">")
655 return
656 }
657 }
658}
659
660// readMarkupDeclaration reads the next token starting with "<!". It might be
661// a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or
662// "<!a bogus comment". The opening "<!" has already been consumed.
663func (z *Tokenizer) readMarkupDeclaration() TokenType {
664 z.data.start = z.raw.end
665 var c [2]byte
666 for i := 0; i < 2; i++ {
667 c[i] = z.readByte()
668 if z.err != nil {
669 z.data.end = z.raw.end
670 return CommentToken
671 }
672 }
673 if c[0] == '-' && c[1] == '-' {
674 z.readComment()
675 return CommentToken
676 }
677 z.raw.end -= 2
678 if z.readDoctype() {
679 return DoctypeToken
680 }
681 if z.allowCDATA && z.readCDATA() {
682 z.convertNUL = true
683 return TextToken
684 }
685 // It's a bogus comment.
686 z.readUntilCloseAngle()
687 return CommentToken
688}
689
690// readDoctype attempts to read a doctype declaration and returns true if
691// successful. The opening "<!" has already been consumed.
692func (z *Tokenizer) readDoctype() bool {
693 const s = "DOCTYPE"
694 for i := 0; i < len(s); i++ {
695 c := z.readByte()
696 if z.err != nil {
697 z.data.end = z.raw.end
698 return false
699 }
700 if c != s[i] && c != s[i]+('a'-'A') {
701 // Back up to read the fragment of "DOCTYPE" again.
702 z.raw.end = z.data.start
703 return false
704 }
705 }
706 if z.skipWhiteSpace(); z.err != nil {
707 z.data.start = z.raw.end
708 z.data.end = z.raw.end
709 return true
710 }
711 z.readUntilCloseAngle()
712 return true
713}
714
715// readCDATA attempts to read a CDATA section and returns true if
716// successful. The opening "<!" has already been consumed.
717func (z *Tokenizer) readCDATA() bool {
718 const s = "[CDATA["
719 for i := 0; i < len(s); i++ {
720 c := z.readByte()
721 if z.err != nil {
722 z.data.end = z.raw.end
723 return false
724 }
725 if c != s[i] {
726 // Back up to read the fragment of "[CDATA[" again.
727 z.raw.end = z.data.start
728 return false
729 }
730 }
731 z.data.start = z.raw.end
732 brackets := 0
733 for {
734 c := z.readByte()
735 if z.err != nil {
736 z.data.end = z.raw.end
737 return true
738 }
739 switch c {
740 case ']':
741 brackets++
742 case '>':
743 if brackets >= 2 {
744 z.data.end = z.raw.end - len("]]>")
745 return true
746 }
747 brackets = 0
748 default:
749 brackets = 0
750 }
751 }
752}
753
754// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
755// case-insensitively matches any element of ss.
756func (z *Tokenizer) startTagIn(ss ...string) bool {
757loop:
758 for _, s := range ss {
759 if z.data.end-z.data.start != len(s) {
760 continue loop
761 }
762 for i := 0; i < len(s); i++ {
763 c := z.buf[z.data.start+i]
764 if 'A' <= c && c <= 'Z' {
765 c += 'a' - 'A'
766 }
767 if c != s[i] {
768 continue loop
769 }
770 }
771 return true
772 }
773 return false
774}
775
776// readStartTag reads the next start tag token. The opening "<a" has already
777// been consumed, where 'a' means anything in [A-Za-z].
778func (z *Tokenizer) readStartTag() TokenType {
779 z.readTag(true)
780 if z.err != nil {
781 return ErrorToken
782 }
783 // Several tags flag the tokenizer's next token as raw.
784 c, raw := z.buf[z.data.start], false
785 if 'A' <= c && c <= 'Z' {
786 c += 'a' - 'A'
787 }
788 switch c {
789 case 'i':
790 raw = z.startTagIn("iframe")
791 case 'n':
792 raw = z.startTagIn("noembed", "noframes", "noscript")
793 case 'p':
794 raw = z.startTagIn("plaintext")
795 case 's':
796 raw = z.startTagIn("script", "style")
797 case 't':
798 raw = z.startTagIn("textarea", "title")
799 case 'x':
800 raw = z.startTagIn("xmp")
801 }
802 if raw {
803 z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
804 }
805 // Look for a self-closing token like "<br/>".
806 if z.err == nil && z.buf[z.raw.end-2] == '/' {
807 return SelfClosingTagToken
808 }
809 return StartTagToken
810}
811
812// readTag reads the next tag token and its attributes. If saveAttr, those
813// attributes are saved in z.attr, otherwise z.attr is set to an empty slice.
814// The opening "<a" or "</a" has already been consumed, where 'a' means anything
815// in [A-Za-z].
816func (z *Tokenizer) readTag(saveAttr bool) {
817 z.attr = z.attr[:0]
818 z.nAttrReturned = 0
819 // Read the tag name and attribute key/value pairs.
820 z.readTagName()
821 if z.skipWhiteSpace(); z.err != nil {
822 return
823 }
824 for {
825 c := z.readByte()
826 if z.err != nil || c == '>' {
827 break
828 }
829 z.raw.end--
830 z.readTagAttrKey()
831 z.readTagAttrVal()
832 // Save pendingAttr if saveAttr and that attribute has a non-empty key.
833 if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
834 z.attr = append(z.attr, z.pendingAttr)
835 }
836 if z.skipWhiteSpace(); z.err != nil {
837 break
838 }
839 }
840}
841
842// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end)
843// is positioned such that the first byte of the tag name (the "d" in "<div")
844// has already been consumed.
845func (z *Tokenizer) readTagName() {
846 z.data.start = z.raw.end - 1
847 for {
848 c := z.readByte()
849 if z.err != nil {
850 z.data.end = z.raw.end
851 return
852 }
853 switch c {
854 case ' ', '\n', '\r', '\t', '\f':
855 z.data.end = z.raw.end - 1
856 return
857 case '/', '>':
858 z.raw.end--
859 z.data.end = z.raw.end
860 return
861 }
862 }
863}
864
865// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>".
866// Precondition: z.err == nil.
867func (z *Tokenizer) readTagAttrKey() {
868 z.pendingAttr[0].start = z.raw.end
869 for {
870 c := z.readByte()
871 if z.err != nil {
872 z.pendingAttr[0].end = z.raw.end
873 return
874 }
875 switch c {
876 case ' ', '\n', '\r', '\t', '\f', '/':
877 z.pendingAttr[0].end = z.raw.end - 1
878 return
879 case '=', '>':
880 z.raw.end--
881 z.pendingAttr[0].end = z.raw.end
882 return
883 }
884 }
885}
886
887// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>".
888func (z *Tokenizer) readTagAttrVal() {
889 z.pendingAttr[1].start = z.raw.end
890 z.pendingAttr[1].end = z.raw.end
891 if z.skipWhiteSpace(); z.err != nil {
892 return
893 }
894 c := z.readByte()
895 if z.err != nil {
896 return
897 }
898 if c != '=' {
899 z.raw.end--
900 return
901 }
902 if z.skipWhiteSpace(); z.err != nil {
903 return
904 }
905 quote := z.readByte()
906 if z.err != nil {
907 return
908 }
909 switch quote {
910 case '>':
911 z.raw.end--
912 return
913
914 case '\'', '"':
915 z.pendingAttr[1].start = z.raw.end
916 for {
917 c := z.readByte()
918 if z.err != nil {
919 z.pendingAttr[1].end = z.raw.end
920 return
921 }
922 if c == quote {
923 z.pendingAttr[1].end = z.raw.end - 1
924 return
925 }
926 }
927
928 default:
929 z.pendingAttr[1].start = z.raw.end - 1
930 for {
931 c := z.readByte()
932 if z.err != nil {
933 z.pendingAttr[1].end = z.raw.end
934 return
935 }
936 switch c {
937 case ' ', '\n', '\r', '\t', '\f':
938 z.pendingAttr[1].end = z.raw.end - 1
939 return
940 case '>':
941 z.raw.end--
942 z.pendingAttr[1].end = z.raw.end
943 return
944 }
945 }
946 }
947}
948
949// Next scans the next token and returns its type.
950func (z *Tokenizer) Next() TokenType {
951 z.raw.start = z.raw.end
952 z.data.start = z.raw.end
953 z.data.end = z.raw.end
954 if z.err != nil {
955 z.tt = ErrorToken
956 return z.tt
957 }
958 if z.rawTag != "" {
959 if z.rawTag == "plaintext" {
960 // Read everything up to EOF.
961 for z.err == nil {
962 z.readByte()
963 }
964 z.data.end = z.raw.end
965 z.textIsRaw = true
966 } else {
967 z.readRawOrRCDATA()
968 }
969 if z.data.end > z.data.start {
970 z.tt = TextToken
971 z.convertNUL = true
972 return z.tt
973 }
974 }
975 z.textIsRaw = false
976 z.convertNUL = false
977
978loop:
979 for {
980 c := z.readByte()
981 if z.err != nil {
982 break loop
983 }
984 if c != '<' {
985 continue loop
986 }
987
988 // Check if the '<' we have just read is part of a tag, comment
989 // or doctype. If not, it's part of the accumulated text token.
990 c = z.readByte()
991 if z.err != nil {
992 break loop
993 }
994 var tokenType TokenType
995 switch {
996 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
997 tokenType = StartTagToken
998 case c == '/':
999 tokenType = EndTagToken
1000 case c == '!' || c == '?':
1001 // We use CommentToken to mean any of "<!--actual comments-->",
1002 // "<!DOCTYPE declarations>" and "<?xml processing instructions?>".
1003 tokenType = CommentToken
1004 default:
1005 // Reconsume the current character.
1006 z.raw.end--
1007 continue
1008 }
1009
1010 // We have a non-text token, but we might have accumulated some text
1011 // before that. If so, we return the text first, and return the non-
1012 // text token on the subsequent call to Next.
1013 if x := z.raw.end - len("<a"); z.raw.start < x {
1014 z.raw.end = x
1015 z.data.end = x
1016 z.tt = TextToken
1017 return z.tt
1018 }
1019 switch tokenType {
1020 case StartTagToken:
1021 z.tt = z.readStartTag()
1022 return z.tt
1023 case EndTagToken:
1024 c = z.readByte()
1025 if z.err != nil {
1026 break loop
1027 }
1028 if c == '>' {
1029 // "</>" does not generate a token at all. Generate an empty comment
1030 // to allow passthrough clients to pick up the data using Raw.
1031 // Reset the tokenizer state and start again.
1032 z.tt = CommentToken
1033 return z.tt
1034 }
1035 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
1036 z.readTag(false)
1037 if z.err != nil {
1038 z.tt = ErrorToken
1039 } else {
1040 z.tt = EndTagToken
1041 }
1042 return z.tt
1043 }
1044 z.raw.end--
1045 z.readUntilCloseAngle()
1046 z.tt = CommentToken
1047 return z.tt
1048 case CommentToken:
1049 if c == '!' {
1050 z.tt = z.readMarkupDeclaration()
1051 return z.tt
1052 }
1053 z.raw.end--
1054 z.readUntilCloseAngle()
1055 z.tt = CommentToken
1056 return z.tt
1057 }
1058 }
1059 if z.raw.start < z.raw.end {
1060 z.data.end = z.raw.end
1061 z.tt = TextToken
1062 return z.tt
1063 }
1064 z.tt = ErrorToken
1065 return z.tt
1066}
1067
1068// Raw returns the unmodified text of the current token. Calling Next, Token,
1069// Text, TagName or TagAttr may change the contents of the returned slice.
1070func (z *Tokenizer) Raw() []byte {
1071 return z.buf[z.raw.start:z.raw.end]
1072}
1073
1074// convertNewlines converts "\r" and "\r\n" in s to "\n".
1075// The conversion happens in place, but the resulting slice may be shorter.
1076func convertNewlines(s []byte) []byte {
1077 for i, c := range s {
1078 if c != '\r' {
1079 continue
1080 }
1081
1082 src := i + 1
1083 if src >= len(s) || s[src] != '\n' {
1084 s[i] = '\n'
1085 continue
1086 }
1087
1088 dst := i
1089 for src < len(s) {
1090 if s[src] == '\r' {
1091 if src+1 < len(s) && s[src+1] == '\n' {
1092 src++
1093 }
1094 s[dst] = '\n'
1095 } else {
1096 s[dst] = s[src]
1097 }
1098 src++
1099 dst++
1100 }
1101 return s[:dst]
1102 }
1103 return s
1104}
1105
1106var (
1107 nul = []byte("\x00")
1108 replacement = []byte("\ufffd")
1109)
1110
1111// Text returns the unescaped text of a text, comment or doctype token. The
1112// contents of the returned slice may change on the next call to Next.
1113func (z *Tokenizer) Text() []byte {
1114 switch z.tt {
1115 case TextToken, CommentToken, DoctypeToken:
1116 s := z.buf[z.data.start:z.data.end]
1117 z.data.start = z.raw.end
1118 z.data.end = z.raw.end
1119 s = convertNewlines(s)
1120 if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) {
1121 s = bytes.Replace(s, nul, replacement, -1)
1122 }
1123 if !z.textIsRaw {
1124 s = unescape(s, false)
1125 }
1126 return s
1127 }
1128 return nil
1129}
1130
1131// TagName returns the lower-cased name of a tag token (the `img` out of
1132// `<IMG SRC="foo">`) and whether the tag has attributes.
1133// The contents of the returned slice may change on the next call to Next.
1134func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
1135 if z.data.start < z.data.end {
1136 switch z.tt {
1137 case StartTagToken, EndTagToken, SelfClosingTagToken:
1138 s := z.buf[z.data.start:z.data.end]
1139 z.data.start = z.raw.end
1140 z.data.end = z.raw.end
1141 return lower(s), z.nAttrReturned < len(z.attr)
1142 }
1143 }
1144 return nil, false
1145}
1146
1147// TagAttr returns the lower-cased key and unescaped value of the next unparsed
1148// attribute for the current tag token and whether there are more attributes.
1149// The contents of the returned slices may change on the next call to Next.
1150func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
1151 if z.nAttrReturned < len(z.attr) {
1152 switch z.tt {
1153 case StartTagToken, SelfClosingTagToken:
1154 x := z.attr[z.nAttrReturned]
1155 z.nAttrReturned++
1156 key = z.buf[x[0].start:x[0].end]
1157 val = z.buf[x[1].start:x[1].end]
1158 return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
1159 }
1160 }
1161 return nil, nil, false
1162}
1163
1164// Token returns the next Token. The result's Data and Attr values remain valid
1165// after subsequent Next calls.
1166func (z *Tokenizer) Token() Token {
1167 t := Token{Type: z.tt}
1168 switch z.tt {
1169 case TextToken, CommentToken, DoctypeToken:
1170 t.Data = string(z.Text())
1171 case StartTagToken, SelfClosingTagToken, EndTagToken:
1172 name, moreAttr := z.TagName()
1173 for moreAttr {
1174 var key, val []byte
1175 key, val, moreAttr = z.TagAttr()
1176 t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
1177 }
1178 if a := atom.Lookup(name); a != 0 {
1179 t.DataAtom, t.Data = a, a.String()
1180 } else {
1181 t.DataAtom, t.Data = 0, string(name)
1182 }
1183 }
1184 return t
1185}
1186
1187// SetMaxBuf sets a limit on the amount of data buffered during tokenization.
1188// A value of 0 means unlimited.
1189func (z *Tokenizer) SetMaxBuf(n int) {
1190 z.maxBuf = n
1191}
1192
1193// NewTokenizer returns a new HTML Tokenizer for the given Reader.
1194// The input is assumed to be UTF-8 encoded.
1195func NewTokenizer(r io.Reader) *Tokenizer {
1196 return NewTokenizerFragment(r, "")
1197}
1198
1199// NewTokenizerFragment returns a new HTML Tokenizer for the given Reader, for
1200// tokenizing an existing element's InnerHTML fragment. contextTag is that
1201// element's tag, such as "div" or "iframe".
1202//
1203// For example, how the InnerHTML "a<b" is tokenized depends on whether it is
1204// for a <p> tag or a <script> tag.
1205//
1206// The input is assumed to be UTF-8 encoded.
1207func NewTokenizerFragment(r io.Reader, contextTag string) *Tokenizer {
1208 z := &Tokenizer{
1209 r: r,
1210 buf: make([]byte, 0, 4096),
1211 }
1212 if contextTag != "" {
1213 switch s := strings.ToLower(contextTag); s {
1214 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
1215 z.rawTag = s
1216 }
1217 }
1218 return z
1219}