]>
Commit | Line | Data |
---|---|---|
7adcb81e IB |
1 | /** |
2 | * @license Copyright (c) 2003-2015, CKSource - Frederico Knabben. All rights reserved. | |
3 | * For licensing, see LICENSE.md or http://ckeditor.com/license | |
4 | */ | |
5 | ||
6 | /** | |
7 | * Provides an "event like" system to parse strings of HTML data. | |
8 | * | |
9 | * var parser = new CKEDITOR.htmlParser(); | |
10 | * parser.onTagOpen = function( tagName, attributes, selfClosing ) { | |
11 | * alert( tagName ); | |
12 | * }; | |
13 | * parser.parse( '<p>Some <b>text</b>.</p>' ); // Alerts 'p', 'b'. | |
14 | * | |
15 | * @class | |
16 | * @constructor Creates a htmlParser class instance. | |
17 | */ | |
18 | CKEDITOR.htmlParser = function() { | |
19 | this._ = { | |
20 | htmlPartsRegex: /<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))/g | |
21 | }; | |
22 | }; | |
23 | ||
24 | ( function() { | |
25 | var attribsRegex = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g, | |
26 | emptyAttribs = { checked: 1, compact: 1, declare: 1, defer: 1, disabled: 1, ismap: 1, multiple: 1, nohref: 1, noresize: 1, noshade: 1, nowrap: 1, readonly: 1, selected: 1 }; | |
27 | ||
28 | CKEDITOR.htmlParser.prototype = { | |
29 | /** | |
30 | * Function to be fired when a tag opener is found. This function | |
31 | * should be overriden when using this class. | |
32 | * | |
33 | * var parser = new CKEDITOR.htmlParser(); | |
34 | * parser.onTagOpen = function( tagName, attributes, selfClosing ) { | |
35 | * alert( tagName ); // e.g. 'b' | |
36 | * } ); | |
37 | * parser.parse( '<!-- Example --><b>Hello</b>' ); | |
38 | * | |
39 | * @param {String} tagName The tag name. The name is guarantted to be lowercased. | |
40 | * @param {Object} attributes An object containing all tag attributes. Each | |
41 | * property in this object represent and attribute name and its value is the attribute value. | |
42 | * @param {Boolean} selfClosing `true` if the tag closes itself, false if the tag doesn't. | |
43 | */ | |
44 | onTagOpen: function() {}, | |
45 | ||
46 | /** | |
47 | * Function to be fired when a tag closer is found. This function | |
48 | * should be overriden when using this class. | |
49 | * | |
50 | * var parser = new CKEDITOR.htmlParser(); | |
51 | * parser.onTagClose = function( tagName ) { | |
52 | * alert( tagName ); // 'b' | |
53 | * } ); | |
54 | * parser.parse( '<!-- Example --><b>Hello</b>' ); | |
55 | * | |
56 | * @param {String} tagName The tag name. The name is guarantted to be lowercased. | |
57 | */ | |
58 | onTagClose: function() {}, | |
59 | ||
60 | /** | |
61 | * Function to be fired when text is found. This function | |
62 | * should be overriden when using this class. | |
63 | * | |
64 | * var parser = new CKEDITOR.htmlParser(); | |
65 | * parser.onText = function( text ) { | |
66 | * alert( text ); // 'Hello' | |
67 | * } ); | |
68 | * parser.parse( '<!-- Example --><b>Hello</b>' ); | |
69 | * | |
70 | * @param {String} text The text found. | |
71 | */ | |
72 | onText: function() {}, | |
73 | ||
74 | /** | |
75 | * Function to be fired when CDATA section is found. This function | |
76 | * should be overriden when using this class. | |
77 | * | |
78 | * var parser = new CKEDITOR.htmlParser(); | |
79 | * parser.onCDATA = function( cdata ) { | |
80 | * alert( cdata ); // 'var hello;' | |
81 | * } ); | |
82 | * parser.parse( '<script>var hello;</script>' ); | |
83 | * | |
84 | * @param {String} cdata The CDATA been found. | |
85 | */ | |
86 | onCDATA: function() {}, | |
87 | ||
88 | /** | |
89 | * Function to be fired when a commend is found. This function | |
90 | * should be overriden when using this class. | |
91 | * | |
92 | * var parser = new CKEDITOR.htmlParser(); | |
93 | * parser.onComment = function( comment ) { | |
94 | * alert( comment ); // ' Example ' | |
95 | * } ); | |
96 | * parser.parse( '<!-- Example --><b>Hello</b>' ); | |
97 | * | |
98 | * @param {String} comment The comment text. | |
99 | */ | |
100 | onComment: function() {}, | |
101 | ||
102 | /** | |
103 | * Parses text, looking for HTML tokens, like tag openers or closers, | |
104 | * or comments. This function fires the onTagOpen, onTagClose, onText | |
105 | * and onComment function during its execution. | |
106 | * | |
107 | * var parser = new CKEDITOR.htmlParser(); | |
108 | * // The onTagOpen, onTagClose, onText and onComment should be overriden | |
109 | * // at this point. | |
110 | * parser.parse( '<!-- Example --><b>Hello</b>' ); | |
111 | * | |
112 | * @param {String} html The HTML to be parsed. | |
113 | */ | |
114 | parse: function( html ) { | |
115 | var parts, tagName, | |
116 | nextIndex = 0, | |
117 | cdata; // The collected data inside a CDATA section. | |
118 | ||
119 | while ( ( parts = this._.htmlPartsRegex.exec( html ) ) ) { | |
120 | var tagIndex = parts.index; | |
121 | if ( tagIndex > nextIndex ) { | |
122 | var text = html.substring( nextIndex, tagIndex ); | |
123 | ||
124 | if ( cdata ) | |
125 | cdata.push( text ); | |
126 | else | |
127 | this.onText( text ); | |
128 | } | |
129 | ||
130 | nextIndex = this._.htmlPartsRegex.lastIndex; | |
131 | ||
132 | // "parts" is an array with the following items: | |
133 | // 0 : The entire match for opening/closing tags and comments. | |
134 | // : Group filled with the tag name for closing tags. | |
135 | // 2 : Group filled with the comment text. | |
136 | // 3 : Group filled with the tag name for opening tags. | |
137 | // 4 : Group filled with the attributes part of opening tags. | |
138 | ||
139 | // Closing tag | |
140 | if ( ( tagName = parts[ 1 ] ) ) { | |
141 | tagName = tagName.toLowerCase(); | |
142 | ||
143 | if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] ) { | |
144 | // Send the CDATA data. | |
145 | this.onCDATA( cdata.join( '' ) ); | |
146 | cdata = null; | |
147 | } | |
148 | ||
149 | if ( !cdata ) { | |
150 | this.onTagClose( tagName ); | |
151 | continue; | |
152 | } | |
153 | } | |
154 | ||
155 | // If CDATA is enabled, just save the raw match. | |
156 | if ( cdata ) { | |
157 | cdata.push( parts[ 0 ] ); | |
158 | continue; | |
159 | } | |
160 | ||
161 | // Opening tag | |
162 | if ( ( tagName = parts[ 3 ] ) ) { | |
163 | tagName = tagName.toLowerCase(); | |
164 | ||
165 | // There are some tag names that can break things, so let's | |
166 | // simply ignore them when parsing. (#5224) | |
167 | if ( /="/.test( tagName ) ) | |
168 | continue; | |
169 | ||
170 | var attribs = {}, | |
171 | attribMatch, | |
172 | attribsPart = parts[ 4 ], | |
173 | selfClosing = !!parts[ 5 ]; | |
174 | ||
175 | if ( attribsPart ) { | |
176 | while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) ) { | |
177 | var attName = attribMatch[ 1 ].toLowerCase(), | |
178 | attValue = attribMatch[ 2 ] || attribMatch[ 3 ] || attribMatch[ 4 ] || ''; | |
179 | ||
180 | if ( !attValue && emptyAttribs[ attName ] ) | |
181 | attribs[ attName ] = attName; | |
182 | else | |
183 | attribs[ attName ] = CKEDITOR.tools.htmlDecodeAttr( attValue ); | |
184 | } | |
185 | } | |
186 | ||
187 | this.onTagOpen( tagName, attribs, selfClosing ); | |
188 | ||
189 | // Open CDATA mode when finding the appropriate tags. | |
190 | if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] ) | |
191 | cdata = []; | |
192 | ||
193 | continue; | |
194 | } | |
195 | ||
196 | // Comment | |
197 | if ( ( tagName = parts[ 2 ] ) ) | |
198 | this.onComment( tagName ); | |
199 | } | |
200 | ||
201 | if ( html.length > nextIndex ) | |
202 | this.onText( html.substring( nextIndex, html.length ) ); | |
203 | } | |
204 | }; | |
205 | } )(); |