1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 // parseDoctype parses the data from a DoctypeToken into a name,
12 // public identifier, and system identifier. It returns a Node whose Type
13 // is DoctypeNode, whose Data is the name, and which has attributes
14 // named "system" and "public" for the two identifiers if they were present.
15 // quirks is whether the document should be parsed in "quirks mode".
16 func parseDoctype(s string) (n *Node, quirks bool) {
17 n = &Node{Type: DoctypeNode}
20 space := strings.IndexAny(s, whitespace)
25 // The comparison to "html" is case-sensitive.
29 n.Data = strings.ToLower(n.Data)
30 s = strings.TrimLeft(s[space:], whitespace)
33 // It can't start with "PUBLIC" or "SYSTEM".
34 // Ignore the rest of the string.
35 return n, quirks || s != ""
38 key := strings.ToLower(s[:6])
40 for key == "public" || key == "system" {
41 s = strings.TrimLeft(s, whitespace)
46 if quote != '"' && quote != '\'' {
50 q := strings.IndexRune(s, rune(quote))
59 n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
67 if key != "" || s != "" {
69 } else if len(n.Attr) > 0 {
70 if n.Attr[0].Key == "public" {
71 public := strings.ToLower(n.Attr[0].Val)
73 case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
76 for _, q := range quirkyIDs {
77 if strings.HasPrefix(public, q) {
83 // The following two public IDs only cause quirks mode if there is no system ID.
84 if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
85 strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
89 if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
90 strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
98 // quirkyIDs is a list of public doctype identifiers that cause a document
99 // to be interpreted in quirks mode. The identifiers should be in lower case.
100 var quirkyIDs = []string{
101 "+//silmaril//dtd html pro v0r11 19970101//",
102 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
103 "-//as//dtd html 3.0 aswedit + extensions//",
104 "-//ietf//dtd html 2.0 level 1//",
105 "-//ietf//dtd html 2.0 level 2//",
106 "-//ietf//dtd html 2.0 strict level 1//",
107 "-//ietf//dtd html 2.0 strict level 2//",
108 "-//ietf//dtd html 2.0 strict//",
109 "-//ietf//dtd html 2.0//",
110 "-//ietf//dtd html 2.1e//",
111 "-//ietf//dtd html 3.0//",
112 "-//ietf//dtd html 3.2 final//",
113 "-//ietf//dtd html 3.2//",
114 "-//ietf//dtd html 3//",
115 "-//ietf//dtd html level 0//",
116 "-//ietf//dtd html level 1//",
117 "-//ietf//dtd html level 2//",
118 "-//ietf//dtd html level 3//",
119 "-//ietf//dtd html strict level 0//",
120 "-//ietf//dtd html strict level 1//",
121 "-//ietf//dtd html strict level 2//",
122 "-//ietf//dtd html strict level 3//",
123 "-//ietf//dtd html strict//",
124 "-//ietf//dtd html//",
125 "-//metrius//dtd metrius presentational//",
126 "-//microsoft//dtd internet explorer 2.0 html strict//",
127 "-//microsoft//dtd internet explorer 2.0 html//",
128 "-//microsoft//dtd internet explorer 2.0 tables//",
129 "-//microsoft//dtd internet explorer 3.0 html strict//",
130 "-//microsoft//dtd internet explorer 3.0 html//",
131 "-//microsoft//dtd internet explorer 3.0 tables//",
132 "-//netscape comm. corp.//dtd html//",
133 "-//netscape comm. corp.//dtd strict html//",
134 "-//o'reilly and associates//dtd html 2.0//",
135 "-//o'reilly and associates//dtd html extended 1.0//",
136 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
137 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
138 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
139 "-//spyglass//dtd html 2.0 extended//",
140 "-//sq//dtd html 2.0 hotmetal + extensions//",
141 "-//sun microsystems corp.//dtd hotjava html//",
142 "-//sun microsystems corp.//dtd hotjava strict html//",
143 "-//w3c//dtd html 3 1995-03-24//",
144 "-//w3c//dtd html 3.2 draft//",
145 "-//w3c//dtd html 3.2 final//",
146 "-//w3c//dtd html 3.2//",
147 "-//w3c//dtd html 3.2s draft//",
148 "-//w3c//dtd html 4.0 frameset//",
149 "-//w3c//dtd html 4.0 transitional//",
150 "-//w3c//dtd html experimental 19960712//",
151 "-//w3c//dtd html experimental 970421//",
152 "-//w3c//dtd w3 html//",
153 "-//w3o//dtd w3 html 3.0//",
154 "-//webtechs//dtd mozilla html 2.0//",
155 "-//webtechs//dtd mozilla html//",