|
1
|
|
|
;(function (sax) { // wrapper for non-node envs |
|
2
|
|
|
sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } |
|
3
|
|
|
sax.SAXParser = SAXParser |
|
4
|
|
|
sax.SAXStream = SAXStream |
|
5
|
|
|
sax.createStream = createStream |
|
6
|
|
|
|
|
7
|
|
|
// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. |
|
8
|
|
|
// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), |
|
9
|
|
|
// since that's the earliest that a buffer overrun could occur. This way, checks are |
|
10
|
|
|
// as rare as required, but as often as necessary to ensure never crossing this bound. |
|
11
|
|
|
// Furthermore, buffers are only tested at most once per write(), so passing a very |
|
12
|
|
|
// large string into write() might have undesirable effects, but this is manageable by |
|
13
|
|
|
// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme |
|
14
|
|
|
// edge case, result in creating at most one complete copy of the string passed in. |
|
15
|
|
|
// Set to Infinity to have unlimited buffers. |
|
16
|
|
|
sax.MAX_BUFFER_LENGTH = 64 * 1024 |
|
17
|
|
|
|
|
18
|
|
|
var buffers = [ |
|
19
|
|
|
'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', |
|
20
|
|
|
'procInstName', 'procInstBody', 'entity', 'attribName', |
|
21
|
|
|
'attribValue', 'cdata', 'script' |
|
22
|
|
|
] |
|
23
|
|
|
|
|
24
|
|
|
sax.EVENTS = [ |
|
25
|
|
|
'text', |
|
26
|
|
|
'processinginstruction', |
|
27
|
|
|
'sgmldeclaration', |
|
28
|
|
|
'doctype', |
|
29
|
|
|
'comment', |
|
30
|
|
|
'opentagstart', |
|
31
|
|
|
'attribute', |
|
32
|
|
|
'opentag', |
|
33
|
|
|
'closetag', |
|
34
|
|
|
'opencdata', |
|
35
|
|
|
'cdata', |
|
36
|
|
|
'closecdata', |
|
37
|
|
|
'error', |
|
38
|
|
|
'end', |
|
39
|
|
|
'ready', |
|
40
|
|
|
'script', |
|
41
|
|
|
'opennamespace', |
|
42
|
|
|
'closenamespace' |
|
43
|
|
|
] |
|
44
|
|
|
|
|
45
|
|
|
function SAXParser (strict, opt) { |
|
46
|
|
|
if (!(this instanceof SAXParser)) { |
|
47
|
|
|
return new SAXParser(strict, opt) |
|
48
|
|
|
} |
|
49
|
|
|
|
|
50
|
|
|
var parser = this |
|
51
|
|
|
clearBuffers(parser) |
|
52
|
|
|
parser.q = parser.c = '' |
|
53
|
|
|
parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH |
|
54
|
|
|
parser.opt = opt || {} |
|
55
|
|
|
parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags |
|
56
|
|
|
parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase' |
|
57
|
|
|
parser.tags = [] |
|
58
|
|
|
parser.closed = parser.closedRoot = parser.sawRoot = false |
|
59
|
|
|
parser.tag = parser.error = null |
|
60
|
|
|
parser.strict = !!strict |
|
61
|
|
|
parser.noscript = !!(strict || parser.opt.noscript) |
|
62
|
|
|
parser.state = S.BEGIN |
|
63
|
|
|
parser.strictEntities = parser.opt.strictEntities |
|
64
|
|
|
parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES) |
|
65
|
|
|
parser.attribList = [] |
|
66
|
|
|
|
|
67
|
|
|
// namespaces form a prototype chain. |
|
68
|
|
|
// it always points at the current tag, |
|
69
|
|
|
// which protos to its parent tag. |
|
70
|
|
|
if (parser.opt.xmlns) { |
|
71
|
|
|
parser.ns = Object.create(rootNS) |
|
72
|
|
|
} |
|
73
|
|
|
|
|
74
|
|
|
// disallow unquoted attribute values if not otherwise configured |
|
75
|
|
|
// and strict mode is true |
|
76
|
|
|
if (parser.opt.unquotedAttributeValues === undefined) { |
|
77
|
|
|
parser.opt.unquotedAttributeValues = !strict; |
|
78
|
|
|
} |
|
79
|
|
|
|
|
80
|
|
|
// mostly just for error reporting |
|
81
|
|
|
parser.trackPosition = parser.opt.position !== false |
|
82
|
|
|
if (parser.trackPosition) { |
|
83
|
|
|
parser.position = parser.line = parser.column = 0 |
|
84
|
|
|
} |
|
85
|
|
|
emit(parser, 'onready') |
|
86
|
|
|
} |
|
87
|
|
|
|
|
88
|
|
|
if (!Object.create) { |
|
89
|
|
|
Object.create = function (o) { |
|
90
|
|
|
function F () {} |
|
91
|
|
|
F.prototype = o |
|
92
|
|
|
var newf = new F() |
|
93
|
|
|
return newf |
|
94
|
|
|
} |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
if (!Object.keys) { |
|
98
|
|
|
Object.keys = function (o) { |
|
99
|
|
|
var a = [] |
|
100
|
|
|
for (var i in o) if (o.hasOwnProperty(i)) a.push(i) |
|
101
|
|
|
return a |
|
102
|
|
|
} |
|
103
|
|
|
} |
|
104
|
|
|
|
|
105
|
|
|
function checkBufferLength (parser) { |
|
106
|
|
|
var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) |
|
107
|
|
|
var maxActual = 0 |
|
108
|
|
|
for (var i = 0, l = buffers.length; i < l; i++) { |
|
109
|
|
|
var len = parser[buffers[i]].length |
|
110
|
|
|
if (len > maxAllowed) { |
|
111
|
|
|
// Text/cdata nodes can get big, and since they're buffered, |
|
112
|
|
|
// we can get here under normal conditions. |
|
113
|
|
|
// Avoid issues by emitting the text node now, |
|
114
|
|
|
// so at least it won't get any bigger. |
|
115
|
|
|
switch (buffers[i]) { |
|
116
|
|
|
case 'textNode': |
|
117
|
|
|
closeText(parser) |
|
118
|
|
|
break |
|
119
|
|
|
|
|
120
|
|
|
case 'cdata': |
|
121
|
|
|
emitNode(parser, 'oncdata', parser.cdata) |
|
122
|
|
|
parser.cdata = '' |
|
123
|
|
|
break |
|
124
|
|
|
|
|
125
|
|
|
case 'script': |
|
126
|
|
|
emitNode(parser, 'onscript', parser.script) |
|
127
|
|
|
parser.script = '' |
|
128
|
|
|
break |
|
129
|
|
|
|
|
130
|
|
|
default: |
|
131
|
|
|
error(parser, 'Max buffer length exceeded: ' + buffers[i]) |
|
132
|
|
|
} |
|
133
|
|
|
} |
|
134
|
|
|
maxActual = Math.max(maxActual, len) |
|
135
|
|
|
} |
|
136
|
|
|
// schedule the next check for the earliest possible buffer overrun. |
|
137
|
|
|
var m = sax.MAX_BUFFER_LENGTH - maxActual |
|
138
|
|
|
parser.bufferCheckPosition = m + parser.position |
|
139
|
|
|
} |
|
140
|
|
|
|
|
141
|
|
|
function clearBuffers (parser) { |
|
142
|
|
|
for (var i = 0, l = buffers.length; i < l; i++) { |
|
143
|
|
|
parser[buffers[i]] = '' |
|
144
|
|
|
} |
|
145
|
|
|
} |
|
146
|
|
|
|
|
147
|
|
|
function flushBuffers (parser) { |
|
148
|
|
|
closeText(parser) |
|
149
|
|
|
if (parser.cdata !== '') { |
|
150
|
|
|
emitNode(parser, 'oncdata', parser.cdata) |
|
151
|
|
|
parser.cdata = '' |
|
152
|
|
|
} |
|
153
|
|
|
if (parser.script !== '') { |
|
154
|
|
|
emitNode(parser, 'onscript', parser.script) |
|
155
|
|
|
parser.script = '' |
|
156
|
|
|
} |
|
157
|
|
|
} |
|
158
|
|
|
|
|
159
|
|
|
SAXParser.prototype = { |
|
160
|
|
|
end: function () { end(this) }, |
|
161
|
|
|
write: write, |
|
162
|
|
|
resume: function () { this.error = null; return this }, |
|
163
|
|
|
close: function () { return this.write(null) }, |
|
164
|
|
|
flush: function () { flushBuffers(this) } |
|
165
|
|
|
} |
|
166
|
|
|
|
|
167
|
|
|
var Stream |
|
168
|
|
|
try { |
|
169
|
|
|
Stream = require('stream').Stream |
|
170
|
|
|
} catch (ex) { |
|
171
|
|
|
Stream = function () {} |
|
172
|
|
|
} |
|
173
|
|
|
if (!Stream) Stream = function () {} |
|
174
|
|
|
|
|
175
|
|
|
var streamWraps = sax.EVENTS.filter(function (ev) { |
|
176
|
|
|
return ev !== 'error' && ev !== 'end' |
|
177
|
|
|
}) |
|
178
|
|
|
|
|
179
|
|
|
function createStream (strict, opt) { |
|
180
|
|
|
return new SAXStream(strict, opt) |
|
181
|
|
|
} |
|
182
|
|
|
|
|
183
|
|
|
function SAXStream (strict, opt) { |
|
184
|
|
|
if (!(this instanceof SAXStream)) { |
|
185
|
|
|
return new SAXStream(strict, opt) |
|
186
|
|
|
} |
|
187
|
|
|
|
|
188
|
|
|
Stream.apply(this) |
|
189
|
|
|
|
|
190
|
|
|
this._parser = new SAXParser(strict, opt) |
|
191
|
|
|
this.writable = true |
|
192
|
|
|
this.readable = true |
|
193
|
|
|
|
|
194
|
|
|
var me = this |
|
195
|
|
|
|
|
196
|
|
|
this._parser.onend = function () { |
|
197
|
|
|
me.emit('end') |
|
198
|
|
|
} |
|
199
|
|
|
|
|
200
|
|
|
this._parser.onerror = function (er) { |
|
201
|
|
|
me.emit('error', er) |
|
202
|
|
|
|
|
203
|
|
|
// if didn't throw, then means error was handled. |
|
204
|
|
|
// go ahead and clear error, so we can write again. |
|
205
|
|
|
me._parser.error = null |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
this._decoder = null |
|
209
|
|
|
|
|
210
|
|
|
streamWraps.forEach(function (ev) { |
|
211
|
|
|
Object.defineProperty(me, 'on' + ev, { |
|
212
|
|
|
get: function () { |
|
213
|
|
|
return me._parser['on' + ev] |
|
214
|
|
|
}, |
|
215
|
|
|
set: function (h) { |
|
216
|
|
|
if (!h) { |
|
217
|
|
|
me.removeAllListeners(ev) |
|
218
|
|
|
me._parser['on' + ev] = h |
|
219
|
|
|
return h |
|
220
|
|
|
} |
|
221
|
|
|
me.on(ev, h) |
|
222
|
|
|
}, |
|
223
|
|
|
enumerable: true, |
|
224
|
|
|
configurable: false |
|
225
|
|
|
}) |
|
226
|
|
|
}) |
|
227
|
|
|
} |
|
228
|
|
|
|
|
229
|
|
|
SAXStream.prototype = Object.create(Stream.prototype, { |
|
230
|
|
|
constructor: { |
|
231
|
|
|
value: SAXStream |
|
232
|
|
|
} |
|
233
|
|
|
}) |
|
234
|
|
|
|
|
235
|
|
|
SAXStream.prototype.write = function (data) { |
|
236
|
|
|
if (typeof Buffer === 'function' && |
|
237
|
|
|
typeof Buffer.isBuffer === 'function' && |
|
238
|
|
|
Buffer.isBuffer(data)) { |
|
239
|
|
|
if (!this._decoder) { |
|
240
|
|
|
var SD = require('string_decoder').StringDecoder |
|
241
|
|
|
this._decoder = new SD('utf8') |
|
242
|
|
|
} |
|
243
|
|
|
data = this._decoder.write(data) |
|
244
|
|
|
} |
|
245
|
|
|
|
|
246
|
|
|
this._parser.write(data.toString()) |
|
247
|
|
|
this.emit('data', data) |
|
248
|
|
|
return true |
|
249
|
|
|
} |
|
250
|
|
|
|
|
251
|
|
|
SAXStream.prototype.end = function (chunk) { |
|
252
|
|
|
if (chunk && chunk.length) { |
|
253
|
|
|
this.write(chunk) |
|
254
|
|
|
} |
|
255
|
|
|
this._parser.end() |
|
256
|
|
|
return true |
|
257
|
|
|
} |
|
258
|
|
|
|
|
259
|
|
|
SAXStream.prototype.on = function (ev, handler) { |
|
260
|
|
|
var me = this |
|
261
|
|
|
if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) { |
|
262
|
|
|
me._parser['on' + ev] = function () { |
|
263
|
|
|
var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments) |
|
264
|
|
|
args.splice(0, 0, ev) |
|
265
|
|
|
me.emit.apply(me, args) |
|
266
|
|
|
} |
|
267
|
|
|
} |
|
268
|
|
|
|
|
269
|
|
|
return Stream.prototype.on.call(me, ev, handler) |
|
270
|
|
|
} |
|
271
|
|
|
|
|
272
|
|
|
// this really needs to be replaced with character classes. |
|
273
|
|
|
// XML allows all manner of ridiculous numbers and digits. |
|
274
|
|
|
var CDATA = '[CDATA[' |
|
275
|
|
|
var DOCTYPE = 'DOCTYPE' |
|
276
|
|
|
var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' |
|
277
|
|
|
var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/' |
|
278
|
|
|
var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } |
|
279
|
|
|
|
|
280
|
|
|
// http://www.w3.org/TR/REC-xml/#NT-NameStartChar |
|
281
|
|
|
// This implementation works on strings, a single character at a time |
|
282
|
|
|
// as such, it cannot ever support astral-plane characters (10000-EFFFF) |
|
283
|
|
|
// without a significant breaking change to either this parser, or the |
|
284
|
|
|
// JavaScript language. Implementation of an emoji-capable xml parser |
|
285
|
|
|
// is left as an exercise for the reader. |
|
286
|
|
|
var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ |
|
287
|
|
|
|
|
288
|
|
|
var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ |
|
289
|
|
|
|
|
290
|
|
|
var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ |
|
291
|
|
|
var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ |
|
292
|
|
|
|
|
293
|
|
|
function isWhitespace (c) { |
|
294
|
|
|
return c === ' ' || c === '\n' || c === '\r' || c === '\t' |
|
295
|
|
|
} |
|
296
|
|
|
|
|
297
|
|
|
function isQuote (c) { |
|
298
|
|
|
return c === '"' || c === '\'' |
|
299
|
|
|
} |
|
300
|
|
|
|
|
301
|
|
|
function isAttribEnd (c) { |
|
302
|
|
|
return c === '>' || isWhitespace(c) |
|
303
|
|
|
} |
|
304
|
|
|
|
|
305
|
|
|
function isMatch (regex, c) { |
|
306
|
|
|
return regex.test(c) |
|
307
|
|
|
} |
|
308
|
|
|
|
|
309
|
|
|
function notMatch (regex, c) { |
|
310
|
|
|
return !isMatch(regex, c) |
|
311
|
|
|
} |
|
312
|
|
|
|
|
313
|
|
|
var S = 0 |
|
314
|
|
|
sax.STATE = { |
|
315
|
|
|
BEGIN: S++, // leading byte order mark or whitespace |
|
316
|
|
|
BEGIN_WHITESPACE: S++, // leading whitespace |
|
317
|
|
|
TEXT: S++, // general stuff |
|
318
|
|
|
TEXT_ENTITY: S++, // & and such. |
|
319
|
|
|
OPEN_WAKA: S++, // < |
|
320
|
|
|
SGML_DECL: S++, // <!BLARG |
|
321
|
|
|
SGML_DECL_QUOTED: S++, // <!BLARG foo "bar |
|
322
|
|
|
DOCTYPE: S++, // <!DOCTYPE |
|
323
|
|
|
DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah |
|
324
|
|
|
DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ... |
|
325
|
|
|
DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo |
|
326
|
|
|
COMMENT_STARTING: S++, // <!- |
|
327
|
|
|
COMMENT: S++, // <!-- |
|
328
|
|
|
COMMENT_ENDING: S++, // <!-- blah - |
|
329
|
|
|
COMMENT_ENDED: S++, // <!-- blah -- |
|
330
|
|
|
CDATA: S++, // <![CDATA[ something |
|
331
|
|
|
CDATA_ENDING: S++, // ] |
|
332
|
|
|
CDATA_ENDING_2: S++, // ]] |
|
333
|
|
|
PROC_INST: S++, // <?hi |
|
334
|
|
|
PROC_INST_BODY: S++, // <?hi there |
|
335
|
|
|
PROC_INST_ENDING: S++, // <?hi "there" ? |
|
336
|
|
|
OPEN_TAG: S++, // <strong |
|
337
|
|
|
OPEN_TAG_SLASH: S++, // <strong / |
|
338
|
|
|
ATTRIB: S++, // <a |
|
339
|
|
|
ATTRIB_NAME: S++, // <a foo |
|
340
|
|
|
ATTRIB_NAME_SAW_WHITE: S++, // <a foo _ |
|
341
|
|
|
ATTRIB_VALUE: S++, // <a foo= |
|
342
|
|
|
ATTRIB_VALUE_QUOTED: S++, // <a foo="bar |
|
343
|
|
|
ATTRIB_VALUE_CLOSED: S++, // <a foo="bar" |
|
344
|
|
|
ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar |
|
345
|
|
|
ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar=""" |
|
346
|
|
|
ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=" |
|
347
|
|
|
CLOSE_TAG: S++, // </a |
|
348
|
|
|
CLOSE_TAG_SAW_WHITE: S++, // </a > |
|
349
|
|
|
SCRIPT: S++, // <script> ... |
|
350
|
|
|
SCRIPT_ENDING: S++ // <script> ... < |
|
351
|
|
|
} |
|
352
|
|
|
|
|
353
|
|
|
sax.XML_ENTITIES = { |
|
354
|
|
|
'amp': '&', |
|
355
|
|
|
'gt': '>', |
|
356
|
|
|
'lt': '<', |
|
357
|
|
|
'quot': '"', |
|
358
|
|
|
'apos': "'" |
|
359
|
|
|
} |
|
360
|
|
|
|
|
361
|
|
|
sax.ENTITIES = { |
|
362
|
|
|
'amp': '&', |
|
363
|
|
|
'gt': '>', |
|
364
|
|
|
'lt': '<', |
|
365
|
|
|
'quot': '"', |
|
366
|
|
|
'apos': "'", |
|
367
|
|
|
'AElig': 198, |
|
368
|
|
|
'Aacute': 193, |
|
369
|
|
|
'Acirc': 194, |
|
370
|
|
|
'Agrave': 192, |
|
371
|
|
|
'Aring': 197, |
|
372
|
|
|
'Atilde': 195, |
|
373
|
|
|
'Auml': 196, |
|
374
|
|
|
'Ccedil': 199, |
|
375
|
|
|
'ETH': 208, |
|
376
|
|
|
'Eacute': 201, |
|
377
|
|
|
'Ecirc': 202, |
|
378
|
|
|
'Egrave': 200, |
|
379
|
|
|
'Euml': 203, |
|
380
|
|
|
'Iacute': 205, |
|
381
|
|
|
'Icirc': 206, |
|
382
|
|
|
'Igrave': 204, |
|
383
|
|
|
'Iuml': 207, |
|
384
|
|
|
'Ntilde': 209, |
|
385
|
|
|
'Oacute': 211, |
|
386
|
|
|
'Ocirc': 212, |
|
387
|
|
|
'Ograve': 210, |
|
388
|
|
|
'Oslash': 216, |
|
389
|
|
|
'Otilde': 213, |
|
390
|
|
|
'Ouml': 214, |
|
391
|
|
|
'THORN': 222, |
|
392
|
|
|
'Uacute': 218, |
|
393
|
|
|
'Ucirc': 219, |
|
394
|
|
|
'Ugrave': 217, |
|
395
|
|
|
'Uuml': 220, |
|
396
|
|
|
'Yacute': 221, |
|
397
|
|
|
'aacute': 225, |
|
398
|
|
|
'acirc': 226, |
|
399
|
|
|
'aelig': 230, |
|
400
|
|
|
'agrave': 224, |
|
401
|
|
|
'aring': 229, |
|
402
|
|
|
'atilde': 227, |
|
403
|
|
|
'auml': 228, |
|
404
|
|
|
'ccedil': 231, |
|
405
|
|
|
'eacute': 233, |
|
406
|
|
|
'ecirc': 234, |
|
407
|
|
|
'egrave': 232, |
|
408
|
|
|
'eth': 240, |
|
409
|
|
|
'euml': 235, |
|
410
|
|
|
'iacute': 237, |
|
411
|
|
|
'icirc': 238, |
|
412
|
|
|
'igrave': 236, |
|
413
|
|
|
'iuml': 239, |
|
414
|
|
|
'ntilde': 241, |
|
415
|
|
|
'oacute': 243, |
|
416
|
|
|
'ocirc': 244, |
|
417
|
|
|
'ograve': 242, |
|
418
|
|
|
'oslash': 248, |
|
419
|
|
|
'otilde': 245, |
|
420
|
|
|
'ouml': 246, |
|
421
|
|
|
'szlig': 223, |
|
422
|
|
|
'thorn': 254, |
|
423
|
|
|
'uacute': 250, |
|
424
|
|
|
'ucirc': 251, |
|
425
|
|
|
'ugrave': 249, |
|
426
|
|
|
'uuml': 252, |
|
427
|
|
|
'yacute': 253, |
|
428
|
|
|
'yuml': 255, |
|
429
|
|
|
'copy': 169, |
|
430
|
|
|
'reg': 174, |
|
431
|
|
|
'nbsp': 160, |
|
432
|
|
|
'iexcl': 161, |
|
433
|
|
|
'cent': 162, |
|
434
|
|
|
'pound': 163, |
|
435
|
|
|
'curren': 164, |
|
436
|
|
|
'yen': 165, |
|
437
|
|
|
'brvbar': 166, |
|
438
|
|
|
'sect': 167, |
|
439
|
|
|
'uml': 168, |
|
440
|
|
|
'ordf': 170, |
|
441
|
|
|
'laquo': 171, |
|
442
|
|
|
'not': 172, |
|
443
|
|
|
'shy': 173, |
|
444
|
|
|
'macr': 175, |
|
445
|
|
|
'deg': 176, |
|
446
|
|
|
'plusmn': 177, |
|
447
|
|
|
'sup1': 185, |
|
448
|
|
|
'sup2': 178, |
|
449
|
|
|
'sup3': 179, |
|
450
|
|
|
'acute': 180, |
|
451
|
|
|
'micro': 181, |
|
452
|
|
|
'para': 182, |
|
453
|
|
|
'middot': 183, |
|
454
|
|
|
'cedil': 184, |
|
455
|
|
|
'ordm': 186, |
|
456
|
|
|
'raquo': 187, |
|
457
|
|
|
'frac14': 188, |
|
458
|
|
|
'frac12': 189, |
|
459
|
|
|
'frac34': 190, |
|
460
|
|
|
'iquest': 191, |
|
461
|
|
|
'times': 215, |
|
462
|
|
|
'divide': 247, |
|
463
|
|
|
'OElig': 338, |
|
464
|
|
|
'oelig': 339, |
|
465
|
|
|
'Scaron': 352, |
|
466
|
|
|
'scaron': 353, |
|
467
|
|
|
'Yuml': 376, |
|
468
|
|
|
'fnof': 402, |
|
469
|
|
|
'circ': 710, |
|
470
|
|
|
'tilde': 732, |
|
471
|
|
|
'Alpha': 913, |
|
472
|
|
|
'Beta': 914, |
|
473
|
|
|
'Gamma': 915, |
|
474
|
|
|
'Delta': 916, |
|
475
|
|
|
'Epsilon': 917, |
|
476
|
|
|
'Zeta': 918, |
|
477
|
|
|
'Eta': 919, |
|
478
|
|
|
'Theta': 920, |
|
479
|
|
|
'Iota': 921, |
|
480
|
|
|
'Kappa': 922, |
|
481
|
|
|
'Lambda': 923, |
|
482
|
|
|
'Mu': 924, |
|
483
|
|
|
'Nu': 925, |
|
484
|
|
|
'Xi': 926, |
|
485
|
|
|
'Omicron': 927, |
|
486
|
|
|
'Pi': 928, |
|
487
|
|
|
'Rho': 929, |
|
488
|
|
|
'Sigma': 931, |
|
489
|
|
|
'Tau': 932, |
|
490
|
|
|
'Upsilon': 933, |
|
491
|
|
|
'Phi': 934, |
|
492
|
|
|
'Chi': 935, |
|
493
|
|
|
'Psi': 936, |
|
494
|
|
|
'Omega': 937, |
|
495
|
|
|
'alpha': 945, |
|
496
|
|
|
'beta': 946, |
|
497
|
|
|
'gamma': 947, |
|
498
|
|
|
'delta': 948, |
|
499
|
|
|
'epsilon': 949, |
|
500
|
|
|
'zeta': 950, |
|
501
|
|
|
'eta': 951, |
|
502
|
|
|
'theta': 952, |
|
503
|
|
|
'iota': 953, |
|
504
|
|
|
'kappa': 954, |
|
505
|
|
|
'lambda': 955, |
|
506
|
|
|
'mu': 956, |
|
507
|
|
|
'nu': 957, |
|
508
|
|
|
'xi': 958, |
|
509
|
|
|
'omicron': 959, |
|
510
|
|
|
'pi': 960, |
|
511
|
|
|
'rho': 961, |
|
512
|
|
|
'sigmaf': 962, |
|
513
|
|
|
'sigma': 963, |
|
514
|
|
|
'tau': 964, |
|
515
|
|
|
'upsilon': 965, |
|
516
|
|
|
'phi': 966, |
|
517
|
|
|
'chi': 967, |
|
518
|
|
|
'psi': 968, |
|
519
|
|
|
'omega': 969, |
|
520
|
|
|
'thetasym': 977, |
|
521
|
|
|
'upsih': 978, |
|
522
|
|
|
'piv': 982, |
|
523
|
|
|
'ensp': 8194, |
|
524
|
|
|
'emsp': 8195, |
|
525
|
|
|
'thinsp': 8201, |
|
526
|
|
|
'zwnj': 8204, |
|
527
|
|
|
'zwj': 8205, |
|
528
|
|
|
'lrm': 8206, |
|
529
|
|
|
'rlm': 8207, |
|
530
|
|
|
'ndash': 8211, |
|
531
|
|
|
'mdash': 8212, |
|
532
|
|
|
'lsquo': 8216, |
|
533
|
|
|
'rsquo': 8217, |
|
534
|
|
|
'sbquo': 8218, |
|
535
|
|
|
'ldquo': 8220, |
|
536
|
|
|
'rdquo': 8221, |
|
537
|
|
|
'bdquo': 8222, |
|
538
|
|
|
'dagger': 8224, |
|
539
|
|
|
'Dagger': 8225, |
|
540
|
|
|
'bull': 8226, |
|
541
|
|
|
'hellip': 8230, |
|
542
|
|
|
'permil': 8240, |
|
543
|
|
|
'prime': 8242, |
|
544
|
|
|
'Prime': 8243, |
|
545
|
|
|
'lsaquo': 8249, |
|
546
|
|
|
'rsaquo': 8250, |
|
547
|
|
|
'oline': 8254, |
|
548
|
|
|
'frasl': 8260, |
|
549
|
|
|
'euro': 8364, |
|
550
|
|
|
'image': 8465, |
|
551
|
|
|
'weierp': 8472, |
|
552
|
|
|
'real': 8476, |
|
553
|
|
|
'trade': 8482, |
|
554
|
|
|
'alefsym': 8501, |
|
555
|
|
|
'larr': 8592, |
|
556
|
|
|
'uarr': 8593, |
|
557
|
|
|
'rarr': 8594, |
|
558
|
|
|
'darr': 8595, |
|
559
|
|
|
'harr': 8596, |
|
560
|
|
|
'crarr': 8629, |
|
561
|
|
|
'lArr': 8656, |
|
562
|
|
|
'uArr': 8657, |
|
563
|
|
|
'rArr': 8658, |
|
564
|
|
|
'dArr': 8659, |
|
565
|
|
|
'hArr': 8660, |
|
566
|
|
|
'forall': 8704, |
|
567
|
|
|
'part': 8706, |
|
568
|
|
|
'exist': 8707, |
|
569
|
|
|
'empty': 8709, |
|
570
|
|
|
'nabla': 8711, |
|
571
|
|
|
'isin': 8712, |
|
572
|
|
|
'notin': 8713, |
|
573
|
|
|
'ni': 8715, |
|
574
|
|
|
'prod': 8719, |
|
575
|
|
|
'sum': 8721, |
|
576
|
|
|
'minus': 8722, |
|
577
|
|
|
'lowast': 8727, |
|
578
|
|
|
'radic': 8730, |
|
579
|
|
|
'prop': 8733, |
|
580
|
|
|
'infin': 8734, |
|
581
|
|
|
'ang': 8736, |
|
582
|
|
|
'and': 8743, |
|
583
|
|
|
'or': 8744, |
|
584
|
|
|
'cap': 8745, |
|
585
|
|
|
'cup': 8746, |
|
586
|
|
|
'int': 8747, |
|
587
|
|
|
'there4': 8756, |
|
588
|
|
|
'sim': 8764, |
|
589
|
|
|
'cong': 8773, |
|
590
|
|
|
'asymp': 8776, |
|
591
|
|
|
'ne': 8800, |
|
592
|
|
|
'equiv': 8801, |
|
593
|
|
|
'le': 8804, |
|
594
|
|
|
'ge': 8805, |
|
595
|
|
|
'sub': 8834, |
|
596
|
|
|
'sup': 8835, |
|
597
|
|
|
'nsub': 8836, |
|
598
|
|
|
'sube': 8838, |
|
599
|
|
|
'supe': 8839, |
|
600
|
|
|
'oplus': 8853, |
|
601
|
|
|
'otimes': 8855, |
|
602
|
|
|
'perp': 8869, |
|
603
|
|
|
'sdot': 8901, |
|
604
|
|
|
'lceil': 8968, |
|
605
|
|
|
'rceil': 8969, |
|
606
|
|
|
'lfloor': 8970, |
|
607
|
|
|
'rfloor': 8971, |
|
608
|
|
|
'lang': 9001, |
|
609
|
|
|
'rang': 9002, |
|
610
|
|
|
'loz': 9674, |
|
611
|
|
|
'spades': 9824, |
|
612
|
|
|
'clubs': 9827, |
|
613
|
|
|
'hearts': 9829, |
|
614
|
|
|
'diams': 9830 |
|
615
|
|
|
} |
|
616
|
|
|
|
|
617
|
|
|
Object.keys(sax.ENTITIES).forEach(function (key) { |
|
618
|
|
|
var e = sax.ENTITIES[key] |
|
619
|
|
|
var s = typeof e === 'number' ? String.fromCharCode(e) : e |
|
620
|
|
|
sax.ENTITIES[key] = s |
|
621
|
|
|
}) |
|
622
|
|
|
|
|
623
|
|
|
for (var s in sax.STATE) { |
|
624
|
|
|
sax.STATE[sax.STATE[s]] = s |
|
625
|
|
|
} |
|
626
|
|
|
|
|
627
|
|
|
// shorthand |
|
628
|
|
|
S = sax.STATE |
|
629
|
|
|
|
|
630
|
|
|
function emit (parser, event, data) { |
|
631
|
|
|
parser[event] && parser[event](data) |
|
632
|
|
|
} |
|
633
|
|
|
|
|
634
|
|
|
function emitNode (parser, nodeType, data) { |
|
635
|
|
|
if (parser.textNode) closeText(parser) |
|
636
|
|
|
emit(parser, nodeType, data) |
|
637
|
|
|
} |
|
638
|
|
|
|
|
639
|
|
|
function closeText (parser) { |
|
640
|
|
|
parser.textNode = textopts(parser.opt, parser.textNode) |
|
641
|
|
|
if (parser.textNode) emit(parser, 'ontext', parser.textNode) |
|
642
|
|
|
parser.textNode = '' |
|
643
|
|
|
} |
|
644
|
|
|
|
|
645
|
|
|
function textopts (opt, text) { |
|
646
|
|
|
if (opt.trim) text = text.trim() |
|
647
|
|
|
if (opt.normalize) text = text.replace(/\s+/g, ' ') |
|
648
|
|
|
return text |
|
649
|
|
|
} |
|
650
|
|
|
|
|
651
|
|
|
function error (parser, er) { |
|
652
|
|
|
closeText(parser) |
|
653
|
|
|
if (parser.trackPosition) { |
|
654
|
|
|
er += '\nLine: ' + parser.line + |
|
655
|
|
|
'\nColumn: ' + parser.column + |
|
656
|
|
|
'\nChar: ' + parser.c |
|
657
|
|
|
} |
|
658
|
|
|
er = new Error(er) |
|
659
|
|
|
parser.error = er |
|
660
|
|
|
emit(parser, 'onerror', er) |
|
661
|
|
|
return parser |
|
662
|
|
|
} |
|
663
|
|
|
|
|
664
|
|
|
function end (parser) { |
|
665
|
|
|
if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag') |
|
666
|
|
|
if ((parser.state !== S.BEGIN) && |
|
667
|
|
|
(parser.state !== S.BEGIN_WHITESPACE) && |
|
668
|
|
|
(parser.state !== S.TEXT)) { |
|
669
|
|
|
error(parser, 'Unexpected end') |
|
670
|
|
|
} |
|
671
|
|
|
closeText(parser) |
|
672
|
|
|
parser.c = '' |
|
673
|
|
|
parser.closed = true |
|
674
|
|
|
emit(parser, 'onend') |
|
675
|
|
|
SAXParser.call(parser, parser.strict, parser.opt) |
|
676
|
|
|
return parser |
|
677
|
|
|
} |
|
678
|
|
|
|
|
679
|
|
|
function strictFail (parser, message) { |
|
680
|
|
|
if (typeof parser !== 'object' || !(parser instanceof SAXParser)) { |
|
681
|
|
|
throw new Error('bad call to strictFail') |
|
682
|
|
|
} |
|
683
|
|
|
if (parser.strict) { |
|
684
|
|
|
error(parser, message) |
|
685
|
|
|
} |
|
686
|
|
|
} |
|
687
|
|
|
|
|
688
|
|
|
function newTag (parser) { |
|
689
|
|
|
if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]() |
|
690
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser |
|
691
|
|
|
var tag = parser.tag = { name: parser.tagName, attributes: {} } |
|
692
|
|
|
|
|
693
|
|
|
// will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" |
|
694
|
|
|
if (parser.opt.xmlns) { |
|
695
|
|
|
tag.ns = parent.ns |
|
696
|
|
|
} |
|
697
|
|
|
parser.attribList.length = 0 |
|
698
|
|
|
emitNode(parser, 'onopentagstart', tag) |
|
699
|
|
|
} |
|
700
|
|
|
|
|
701
|
|
|
function qname (name, attribute) { |
|
702
|
|
|
var i = name.indexOf(':') |
|
703
|
|
|
var qualName = i < 0 ? [ '', name ] : name.split(':') |
|
704
|
|
|
var prefix = qualName[0] |
|
705
|
|
|
var local = qualName[1] |
|
706
|
|
|
|
|
707
|
|
|
// <x "xmlns"="http://foo"> |
|
708
|
|
|
if (attribute && name === 'xmlns') { |
|
709
|
|
|
prefix = 'xmlns' |
|
710
|
|
|
local = '' |
|
711
|
|
|
} |
|
712
|
|
|
|
|
713
|
|
|
return { prefix: prefix, local: local } |
|
714
|
|
|
} |
|
715
|
|
|
|
|
716
|
|
|
function attrib (parser) { |
|
717
|
|
|
if (!parser.strict) { |
|
718
|
|
|
parser.attribName = parser.attribName[parser.looseCase]() |
|
719
|
|
|
} |
|
720
|
|
|
|
|
721
|
|
|
if (parser.attribList.indexOf(parser.attribName) !== -1 || |
|
722
|
|
|
parser.tag.attributes.hasOwnProperty(parser.attribName)) { |
|
723
|
|
|
parser.attribName = parser.attribValue = '' |
|
724
|
|
|
return |
|
725
|
|
|
} |
|
726
|
|
|
|
|
727
|
|
|
if (parser.opt.xmlns) { |
|
728
|
|
|
var qn = qname(parser.attribName, true) |
|
729
|
|
|
var prefix = qn.prefix |
|
730
|
|
|
var local = qn.local |
|
731
|
|
|
|
|
732
|
|
|
if (prefix === 'xmlns') { |
|
733
|
|
|
// namespace binding attribute. push the binding into scope |
|
734
|
|
|
if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) { |
|
735
|
|
|
strictFail(parser, |
|
736
|
|
|
'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' + |
|
737
|
|
|
'Actual: ' + parser.attribValue) |
|
738
|
|
|
} else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) { |
|
739
|
|
|
strictFail(parser, |
|
740
|
|
|
'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' + |
|
741
|
|
|
'Actual: ' + parser.attribValue) |
|
742
|
|
|
} else { |
|
743
|
|
|
var tag = parser.tag |
|
744
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser |
|
745
|
|
|
if (tag.ns === parent.ns) { |
|
746
|
|
|
tag.ns = Object.create(parent.ns) |
|
747
|
|
|
} |
|
748
|
|
|
tag.ns[local] = parser.attribValue |
|
749
|
|
|
} |
|
750
|
|
|
} |
|
751
|
|
|
|
|
752
|
|
|
// defer onattribute events until all attributes have been seen |
|
753
|
|
|
// so any new bindings can take effect. preserve attribute order |
|
754
|
|
|
// so deferred events can be emitted in document order |
|
755
|
|
|
parser.attribList.push([parser.attribName, parser.attribValue]) |
|
756
|
|
|
} else { |
|
757
|
|
|
// in non-xmlns mode, we can emit the event right away |
|
758
|
|
|
parser.tag.attributes[parser.attribName] = parser.attribValue |
|
759
|
|
|
emitNode(parser, 'onattribute', { |
|
760
|
|
|
name: parser.attribName, |
|
761
|
|
|
value: parser.attribValue |
|
762
|
|
|
}) |
|
763
|
|
|
} |
|
764
|
|
|
|
|
765
|
|
|
parser.attribName = parser.attribValue = '' |
|
766
|
|
|
} |
|
767
|
|
|
|
|
768
|
|
|
function openTag (parser, selfClosing) { |
|
769
|
|
|
if (parser.opt.xmlns) { |
|
770
|
|
|
// emit namespace binding events |
|
771
|
|
|
var tag = parser.tag |
|
772
|
|
|
|
|
773
|
|
|
// add namespace info to tag |
|
774
|
|
|
var qn = qname(parser.tagName) |
|
775
|
|
|
tag.prefix = qn.prefix |
|
776
|
|
|
tag.local = qn.local |
|
777
|
|
|
tag.uri = tag.ns[qn.prefix] || '' |
|
778
|
|
|
|
|
779
|
|
|
if (tag.prefix && !tag.uri) { |
|
780
|
|
|
strictFail(parser, 'Unbound namespace prefix: ' + |
|
781
|
|
|
JSON.stringify(parser.tagName)) |
|
782
|
|
|
tag.uri = qn.prefix |
|
783
|
|
|
} |
|
784
|
|
|
|
|
785
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser |
|
786
|
|
|
if (tag.ns && parent.ns !== tag.ns) { |
|
787
|
|
|
Object.keys(tag.ns).forEach(function (p) { |
|
788
|
|
|
emitNode(parser, 'onopennamespace', { |
|
789
|
|
|
prefix: p, |
|
790
|
|
|
uri: tag.ns[p] |
|
791
|
|
|
}) |
|
792
|
|
|
}) |
|
793
|
|
|
} |
|
794
|
|
|
|
|
795
|
|
|
// handle deferred onattribute events |
|
796
|
|
|
// Note: do not apply default ns to attributes: |
|
797
|
|
|
// http://www.w3.org/TR/REC-xml-names/#defaulting |
|
798
|
|
|
for (var i = 0, l = parser.attribList.length; i < l; i++) { |
|
799
|
|
|
var nv = parser.attribList[i] |
|
800
|
|
|
var name = nv[0] |
|
801
|
|
|
var value = nv[1] |
|
802
|
|
|
var qualName = qname(name, true) |
|
803
|
|
|
var prefix = qualName.prefix |
|
804
|
|
|
var local = qualName.local |
|
805
|
|
|
var uri = prefix === '' ? '' : (tag.ns[prefix] || '') |
|
806
|
|
|
var a = { |
|
807
|
|
|
name: name, |
|
808
|
|
|
value: value, |
|
809
|
|
|
prefix: prefix, |
|
810
|
|
|
local: local, |
|
811
|
|
|
uri: uri |
|
812
|
|
|
} |
|
813
|
|
|
|
|
814
|
|
|
// if there's any attributes with an undefined namespace, |
|
815
|
|
|
// then fail on them now. |
|
816
|
|
|
if (prefix && prefix !== 'xmlns' && !uri) { |
|
817
|
|
|
strictFail(parser, 'Unbound namespace prefix: ' + |
|
818
|
|
|
JSON.stringify(prefix)) |
|
819
|
|
|
a.uri = prefix |
|
820
|
|
|
} |
|
821
|
|
|
parser.tag.attributes[name] = a |
|
822
|
|
|
emitNode(parser, 'onattribute', a) |
|
823
|
|
|
} |
|
824
|
|
|
parser.attribList.length = 0 |
|
825
|
|
|
} |
|
826
|
|
|
|
|
827
|
|
|
parser.tag.isSelfClosing = !!selfClosing |
|
828
|
|
|
|
|
829
|
|
|
// process the tag |
|
830
|
|
|
parser.sawRoot = true |
|
831
|
|
|
parser.tags.push(parser.tag) |
|
832
|
|
|
emitNode(parser, 'onopentag', parser.tag) |
|
833
|
|
|
if (!selfClosing) { |
|
834
|
|
|
// special case for <script> in non-strict mode. |
|
835
|
|
|
if (!parser.noscript && parser.tagName.toLowerCase() === 'script') { |
|
836
|
|
|
parser.state = S.SCRIPT |
|
837
|
|
|
} else { |
|
838
|
|
|
parser.state = S.TEXT |
|
839
|
|
|
} |
|
840
|
|
|
parser.tag = null |
|
841
|
|
|
parser.tagName = '' |
|
842
|
|
|
} |
|
843
|
|
|
parser.attribName = parser.attribValue = '' |
|
844
|
|
|
parser.attribList.length = 0 |
|
845
|
|
|
} |
|
846
|
|
|
|
|
847
|
|
|
function closeTag (parser) { |
|
848
|
|
|
if (!parser.tagName) { |
|
849
|
|
|
strictFail(parser, 'Weird empty close tag.') |
|
850
|
|
|
parser.textNode += '</>' |
|
851
|
|
|
parser.state = S.TEXT |
|
852
|
|
|
return |
|
853
|
|
|
} |
|
854
|
|
|
|
|
855
|
|
|
if (parser.script) { |
|
856
|
|
|
if (parser.tagName !== 'script') { |
|
857
|
|
|
parser.script += '</' + parser.tagName + '>' |
|
858
|
|
|
parser.tagName = '' |
|
859
|
|
|
parser.state = S.SCRIPT |
|
860
|
|
|
return |
|
861
|
|
|
} |
|
862
|
|
|
emitNode(parser, 'onscript', parser.script) |
|
863
|
|
|
parser.script = '' |
|
864
|
|
|
} |
|
865
|
|
|
|
|
866
|
|
|
// first make sure that the closing tag actually exists. |
|
867
|
|
|
// <a><b></c></b></a> will close everything, otherwise. |
|
868
|
|
|
var t = parser.tags.length |
|
869
|
|
|
var tagName = parser.tagName |
|
870
|
|
|
if (!parser.strict) { |
|
871
|
|
|
tagName = tagName[parser.looseCase]() |
|
872
|
|
|
} |
|
873
|
|
|
var closeTo = tagName |
|
874
|
|
|
while (t--) { |
|
875
|
|
|
var close = parser.tags[t] |
|
876
|
|
|
if (close.name !== closeTo) { |
|
877
|
|
|
// fail the first time in strict mode |
|
878
|
|
|
strictFail(parser, 'Unexpected close tag') |
|
879
|
|
|
} else { |
|
880
|
|
|
break |
|
881
|
|
|
} |
|
882
|
|
|
} |
|
883
|
|
|
|
|
884
|
|
|
// didn't find it. we already failed for strict, so just abort. |
|
885
|
|
|
if (t < 0) { |
|
886
|
|
|
strictFail(parser, 'Unmatched closing tag: ' + parser.tagName) |
|
887
|
|
|
parser.textNode += '</' + parser.tagName + '>' |
|
888
|
|
|
parser.state = S.TEXT |
|
889
|
|
|
return |
|
890
|
|
|
} |
|
891
|
|
|
parser.tagName = tagName |
|
892
|
|
|
var s = parser.tags.length |
|
893
|
|
|
while (s-- > t) { |
|
894
|
|
|
var tag = parser.tag = parser.tags.pop() |
|
895
|
|
|
parser.tagName = parser.tag.name |
|
896
|
|
|
emitNode(parser, 'onclosetag', parser.tagName) |
|
897
|
|
|
|
|
898
|
|
|
var x = {} |
|
899
|
|
|
for (var i in tag.ns) { |
|
900
|
|
|
x[i] = tag.ns[i] |
|
901
|
|
|
} |
|
902
|
|
|
|
|
903
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser |
|
904
|
|
|
if (parser.opt.xmlns && tag.ns !== parent.ns) { |
|
905
|
|
|
// remove namespace bindings introduced by tag |
|
906
|
|
|
Object.keys(tag.ns).forEach(function (p) { |
|
907
|
|
|
var n = tag.ns[p] |
|
908
|
|
|
emitNode(parser, 'onclosenamespace', { prefix: p, uri: n }) |
|
909
|
|
|
}) |
|
910
|
|
|
} |
|
911
|
|
|
} |
|
912
|
|
|
if (t === 0) parser.closedRoot = true |
|
913
|
|
|
parser.tagName = parser.attribValue = parser.attribName = '' |
|
914
|
|
|
parser.attribList.length = 0 |
|
915
|
|
|
parser.state = S.TEXT |
|
916
|
|
|
} |
|
917
|
|
|
|
|
918
|
|
|
function parseEntity (parser) { |
|
919
|
|
|
var entity = parser.entity |
|
920
|
|
|
var entityLC = entity.toLowerCase() |
|
921
|
|
|
var num |
|
922
|
|
|
var numStr = '' |
|
923
|
|
|
|
|
924
|
|
|
if (parser.ENTITIES[entity]) { |
|
925
|
|
|
return parser.ENTITIES[entity] |
|
926
|
|
|
} |
|
927
|
|
|
if (parser.ENTITIES[entityLC]) { |
|
928
|
|
|
return parser.ENTITIES[entityLC] |
|
929
|
|
|
} |
|
930
|
|
|
entity = entityLC |
|
931
|
|
|
if (entity.charAt(0) === '#') { |
|
932
|
|
|
if (entity.charAt(1) === 'x') { |
|
933
|
|
|
entity = entity.slice(2) |
|
934
|
|
|
num = parseInt(entity, 16) |
|
935
|
|
|
numStr = num.toString(16) |
|
936
|
|
|
} else { |
|
937
|
|
|
entity = entity.slice(1) |
|
938
|
|
|
num = parseInt(entity, 10) |
|
939
|
|
|
numStr = num.toString(10) |
|
940
|
|
|
} |
|
941
|
|
|
} |
|
942
|
|
|
entity = entity.replace(/^0+/, '') |
|
943
|
|
|
if (isNaN(num) || numStr.toLowerCase() !== entity) { |
|
944
|
|
|
strictFail(parser, 'Invalid character entity') |
|
945
|
|
|
return '&' + parser.entity + ';' |
|
946
|
|
|
} |
|
947
|
|
|
|
|
948
|
|
|
return String.fromCodePoint(num) |
|
949
|
|
|
} |
|
950
|
|
|
|
|
951
|
|
|
function beginWhiteSpace (parser, c) { |
|
952
|
|
|
if (c === '<') { |
|
953
|
|
|
parser.state = S.OPEN_WAKA |
|
954
|
|
|
parser.startTagPosition = parser.position |
|
955
|
|
|
} else if (!isWhitespace(c)) { |
|
956
|
|
|
// have to process this as a text node. |
|
957
|
|
|
// weird, but happens. |
|
958
|
|
|
strictFail(parser, 'Non-whitespace before first tag.') |
|
959
|
|
|
parser.textNode = c |
|
960
|
|
|
parser.state = S.TEXT |
|
961
|
|
|
} |
|
962
|
|
|
} |
|
963
|
|
|
|
|
964
|
|
|
function charAt (chunk, i) { |
|
965
|
|
|
var result = '' |
|
966
|
|
|
if (i < chunk.length) { |
|
967
|
|
|
result = chunk.charAt(i) |
|
968
|
|
|
} |
|
969
|
|
|
return result |
|
970
|
|
|
} |
|
971
|
|
|
|
|
972
|
|
|
function write (chunk) { |
|
973
|
|
|
var parser = this |
|
974
|
|
|
if (this.error) { |
|
975
|
|
|
throw this.error |
|
976
|
|
|
} |
|
977
|
|
|
if (parser.closed) { |
|
978
|
|
|
return error(parser, |
|
979
|
|
|
'Cannot write after close. Assign an onready handler.') |
|
980
|
|
|
} |
|
981
|
|
|
if (chunk === null) { |
|
982
|
|
|
return end(parser) |
|
983
|
|
|
} |
|
984
|
|
|
if (typeof chunk === 'object') { |
|
985
|
|
|
chunk = chunk.toString() |
|
986
|
|
|
} |
|
987
|
|
|
var i = 0 |
|
988
|
|
|
var c = '' |
|
989
|
|
|
while (true) { |
|
990
|
|
|
c = charAt(chunk, i++) |
|
991
|
|
|
parser.c = c |
|
992
|
|
|
|
|
993
|
|
|
if (!c) { |
|
994
|
|
|
break |
|
995
|
|
|
} |
|
996
|
|
|
|
|
997
|
|
|
if (parser.trackPosition) { |
|
998
|
|
|
parser.position++ |
|
999
|
|
|
if (c === '\n') { |
|
1000
|
|
|
parser.line++ |
|
1001
|
|
|
parser.column = 0 |
|
1002
|
|
|
} else { |
|
1003
|
|
|
parser.column++ |
|
1004
|
|
|
} |
|
1005
|
|
|
} |
|
1006
|
|
|
|
|
1007
|
|
|
switch (parser.state) { |
|
1008
|
|
|
case S.BEGIN: |
|
1009
|
|
|
parser.state = S.BEGIN_WHITESPACE |
|
1010
|
|
|
if (c === '\uFEFF') { |
|
1011
|
|
|
continue |
|
1012
|
|
|
} |
|
1013
|
|
|
beginWhiteSpace(parser, c) |
|
1014
|
|
|
continue |
|
1015
|
|
|
|
|
1016
|
|
|
case S.BEGIN_WHITESPACE: |
|
1017
|
|
|
beginWhiteSpace(parser, c) |
|
1018
|
|
|
continue |
|
1019
|
|
|
|
|
1020
|
|
|
case S.TEXT: |
|
1021
|
|
|
if (parser.sawRoot && !parser.closedRoot) { |
|
1022
|
|
|
var starti = i - 1 |
|
1023
|
|
|
while (c && c !== '<' && c !== '&') { |
|
1024
|
|
|
c = charAt(chunk, i++) |
|
1025
|
|
|
if (c && parser.trackPosition) { |
|
1026
|
|
|
parser.position++ |
|
1027
|
|
|
if (c === '\n') { |
|
1028
|
|
|
parser.line++ |
|
1029
|
|
|
parser.column = 0 |
|
1030
|
|
|
} else { |
|
1031
|
|
|
parser.column++ |
|
1032
|
|
|
} |
|
1033
|
|
|
} |
|
1034
|
|
|
} |
|
1035
|
|
|
parser.textNode += chunk.substring(starti, i - 1) |
|
1036
|
|
|
} |
|
1037
|
|
|
if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) { |
|
1038
|
|
|
parser.state = S.OPEN_WAKA |
|
1039
|
|
|
parser.startTagPosition = parser.position |
|
1040
|
|
|
} else { |
|
1041
|
|
|
if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) { |
|
1042
|
|
|
strictFail(parser, 'Text data outside of root node.') |
|
1043
|
|
|
} |
|
1044
|
|
|
if (c === '&') { |
|
1045
|
|
|
parser.state = S.TEXT_ENTITY |
|
1046
|
|
|
} else { |
|
1047
|
|
|
parser.textNode += c |
|
1048
|
|
|
} |
|
1049
|
|
|
} |
|
1050
|
|
|
continue |
|
1051
|
|
|
|
|
1052
|
|
|
case S.SCRIPT: |
|
1053
|
|
|
// only non-strict |
|
1054
|
|
|
if (c === '<') { |
|
1055
|
|
|
parser.state = S.SCRIPT_ENDING |
|
1056
|
|
|
} else { |
|
1057
|
|
|
parser.script += c |
|
1058
|
|
|
} |
|
1059
|
|
|
continue |
|
1060
|
|
|
|
|
1061
|
|
|
case S.SCRIPT_ENDING: |
|
1062
|
|
|
if (c === '/') { |
|
1063
|
|
|
parser.state = S.CLOSE_TAG |
|
1064
|
|
|
} else { |
|
1065
|
|
|
parser.script += '<' + c |
|
1066
|
|
|
parser.state = S.SCRIPT |
|
1067
|
|
|
} |
|
1068
|
|
|
continue |
|
1069
|
|
|
|
|
1070
|
|
|
case S.OPEN_WAKA: |
|
1071
|
|
|
// either a /, ?, !, or text is coming next. |
|
1072
|
|
|
if (c === '!') { |
|
1073
|
|
|
parser.state = S.SGML_DECL |
|
1074
|
|
|
parser.sgmlDecl = '' |
|
1075
|
|
|
} else if (isWhitespace(c)) { |
|
1076
|
|
|
// wait for it... |
|
1077
|
|
|
} else if (isMatch(nameStart, c)) { |
|
1078
|
|
|
parser.state = S.OPEN_TAG |
|
1079
|
|
|
parser.tagName = c |
|
1080
|
|
|
} else if (c === '/') { |
|
1081
|
|
|
parser.state = S.CLOSE_TAG |
|
1082
|
|
|
parser.tagName = '' |
|
1083
|
|
|
} else if (c === '?') { |
|
1084
|
|
|
parser.state = S.PROC_INST |
|
1085
|
|
|
parser.procInstName = parser.procInstBody = '' |
|
1086
|
|
|
} else { |
|
1087
|
|
|
strictFail(parser, 'Unencoded <') |
|
1088
|
|
|
// if there was some whitespace, then add that in. |
|
1089
|
|
|
if (parser.startTagPosition + 1 < parser.position) { |
|
1090
|
|
|
var pad = parser.position - parser.startTagPosition |
|
1091
|
|
|
c = new Array(pad).join(' ') + c |
|
1092
|
|
|
} |
|
1093
|
|
|
parser.textNode += '<' + c |
|
1094
|
|
|
parser.state = S.TEXT |
|
1095
|
|
|
} |
|
1096
|
|
|
continue |
|
1097
|
|
|
|
|
1098
|
|
|
case S.SGML_DECL: |
|
1099
|
|
|
if (parser.sgmlDecl + c === '--') { |
|
1100
|
|
|
parser.state = S.COMMENT |
|
1101
|
|
|
parser.comment = '' |
|
1102
|
|
|
parser.sgmlDecl = '' |
|
1103
|
|
|
continue; |
|
1104
|
|
|
} |
|
1105
|
|
|
|
|
1106
|
|
|
if (parser.doctype && parser.doctype !== true && parser.sgmlDecl) { |
|
1107
|
|
|
parser.state = S.DOCTYPE_DTD |
|
1108
|
|
|
parser.doctype += '<!' + parser.sgmlDecl + c |
|
1109
|
|
|
parser.sgmlDecl = '' |
|
1110
|
|
|
} else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) { |
|
1111
|
|
|
emitNode(parser, 'onopencdata') |
|
1112
|
|
|
parser.state = S.CDATA |
|
1113
|
|
|
parser.sgmlDecl = '' |
|
1114
|
|
|
parser.cdata = '' |
|
1115
|
|
|
} else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) { |
|
1116
|
|
|
parser.state = S.DOCTYPE |
|
1117
|
|
|
if (parser.doctype || parser.sawRoot) { |
|
1118
|
|
|
strictFail(parser, |
|
1119
|
|
|
'Inappropriately located doctype declaration') |
|
1120
|
|
|
} |
|
1121
|
|
|
parser.doctype = '' |
|
1122
|
|
|
parser.sgmlDecl = '' |
|
1123
|
|
|
} else if (c === '>') { |
|
1124
|
|
|
emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl) |
|
1125
|
|
|
parser.sgmlDecl = '' |
|
1126
|
|
|
parser.state = S.TEXT |
|
1127
|
|
|
} else if (isQuote(c)) { |
|
1128
|
|
|
parser.state = S.SGML_DECL_QUOTED |
|
1129
|
|
|
parser.sgmlDecl += c |
|
1130
|
|
|
} else { |
|
1131
|
|
|
parser.sgmlDecl += c |
|
1132
|
|
|
} |
|
1133
|
|
|
continue |
|
1134
|
|
|
|
|
1135
|
|
|
case S.SGML_DECL_QUOTED: |
|
1136
|
|
|
if (c === parser.q) { |
|
1137
|
|
|
parser.state = S.SGML_DECL |
|
1138
|
|
|
parser.q = '' |
|
1139
|
|
|
} |
|
1140
|
|
|
parser.sgmlDecl += c |
|
1141
|
|
|
continue |
|
1142
|
|
|
|
|
1143
|
|
|
case S.DOCTYPE: |
|
1144
|
|
|
if (c === '>') { |
|
1145
|
|
|
parser.state = S.TEXT |
|
1146
|
|
|
emitNode(parser, 'ondoctype', parser.doctype) |
|
1147
|
|
|
parser.doctype = true // just remember that we saw it. |
|
1148
|
|
|
} else { |
|
1149
|
|
|
parser.doctype += c |
|
1150
|
|
|
if (c === '[') { |
|
1151
|
|
|
parser.state = S.DOCTYPE_DTD |
|
1152
|
|
|
} else if (isQuote(c)) { |
|
1153
|
|
|
parser.state = S.DOCTYPE_QUOTED |
|
1154
|
|
|
parser.q = c |
|
1155
|
|
|
} |
|
1156
|
|
|
} |
|
1157
|
|
|
continue |
|
1158
|
|
|
|
|
1159
|
|
|
case S.DOCTYPE_QUOTED: |
|
1160
|
|
|
parser.doctype += c |
|
1161
|
|
|
if (c === parser.q) { |
|
1162
|
|
|
parser.q = '' |
|
1163
|
|
|
parser.state = S.DOCTYPE |
|
1164
|
|
|
} |
|
1165
|
|
|
continue |
|
1166
|
|
|
|
|
1167
|
|
|
case S.DOCTYPE_DTD: |
|
1168
|
|
|
if (c === ']') { |
|
1169
|
|
|
parser.doctype += c |
|
1170
|
|
|
parser.state = S.DOCTYPE |
|
1171
|
|
|
} else if (c === '<') { |
|
1172
|
|
|
parser.state = S.OPEN_WAKA |
|
1173
|
|
|
parser.startTagPosition = parser.position |
|
1174
|
|
|
} else if (isQuote(c)) { |
|
1175
|
|
|
parser.doctype += c |
|
1176
|
|
|
parser.state = S.DOCTYPE_DTD_QUOTED |
|
1177
|
|
|
parser.q = c |
|
1178
|
|
|
} else { |
|
1179
|
|
|
parser.doctype += c |
|
1180
|
|
|
} |
|
1181
|
|
|
continue |
|
1182
|
|
|
|
|
1183
|
|
|
case S.DOCTYPE_DTD_QUOTED: |
|
1184
|
|
|
parser.doctype += c |
|
1185
|
|
|
if (c === parser.q) { |
|
1186
|
|
|
parser.state = S.DOCTYPE_DTD |
|
1187
|
|
|
parser.q = '' |
|
1188
|
|
|
} |
|
1189
|
|
|
continue |
|
1190
|
|
|
|
|
1191
|
|
|
case S.COMMENT: |
|
1192
|
|
|
if (c === '-') { |
|
1193
|
|
|
parser.state = S.COMMENT_ENDING |
|
1194
|
|
|
} else { |
|
1195
|
|
|
parser.comment += c |
|
1196
|
|
|
} |
|
1197
|
|
|
continue |
|
1198
|
|
|
|
|
1199
|
|
|
case S.COMMENT_ENDING: |
|
1200
|
|
|
if (c === '-') { |
|
1201
|
|
|
parser.state = S.COMMENT_ENDED |
|
1202
|
|
|
parser.comment = textopts(parser.opt, parser.comment) |
|
1203
|
|
|
if (parser.comment) { |
|
1204
|
|
|
emitNode(parser, 'oncomment', parser.comment) |
|
1205
|
|
|
} |
|
1206
|
|
|
parser.comment = '' |
|
1207
|
|
|
} else { |
|
1208
|
|
|
parser.comment += '-' + c |
|
1209
|
|
|
parser.state = S.COMMENT |
|
1210
|
|
|
} |
|
1211
|
|
|
continue |
|
1212
|
|
|
|
|
1213
|
|
|
case S.COMMENT_ENDED: |
|
1214
|
|
|
if (c !== '>') { |
|
1215
|
|
|
strictFail(parser, 'Malformed comment') |
|
1216
|
|
|
// allow <!-- blah -- bloo --> in non-strict mode, |
|
1217
|
|
|
// which is a comment of " blah -- bloo " |
|
1218
|
|
|
parser.comment += '--' + c |
|
1219
|
|
|
parser.state = S.COMMENT |
|
1220
|
|
|
} else if (parser.doctype && parser.doctype !== true) { |
|
1221
|
|
|
parser.state = S.DOCTYPE_DTD |
|
1222
|
|
|
} else { |
|
1223
|
|
|
parser.state = S.TEXT |
|
1224
|
|
|
} |
|
1225
|
|
|
continue |
|
1226
|
|
|
|
|
1227
|
|
|
case S.CDATA: |
|
1228
|
|
|
if (c === ']') { |
|
1229
|
|
|
parser.state = S.CDATA_ENDING |
|
1230
|
|
|
} else { |
|
1231
|
|
|
parser.cdata += c |
|
1232
|
|
|
} |
|
1233
|
|
|
continue |
|
1234
|
|
|
|
|
1235
|
|
|
case S.CDATA_ENDING: |
|
1236
|
|
|
if (c === ']') { |
|
1237
|
|
|
parser.state = S.CDATA_ENDING_2 |
|
1238
|
|
|
} else { |
|
1239
|
|
|
parser.cdata += ']' + c |
|
1240
|
|
|
parser.state = S.CDATA |
|
1241
|
|
|
} |
|
1242
|
|
|
continue |
|
1243
|
|
|
|
|
1244
|
|
|
case S.CDATA_ENDING_2: |
|
1245
|
|
|
if (c === '>') { |
|
1246
|
|
|
if (parser.cdata) { |
|
1247
|
|
|
emitNode(parser, 'oncdata', parser.cdata) |
|
1248
|
|
|
} |
|
1249
|
|
|
emitNode(parser, 'onclosecdata') |
|
1250
|
|
|
parser.cdata = '' |
|
1251
|
|
|
parser.state = S.TEXT |
|
1252
|
|
|
} else if (c === ']') { |
|
1253
|
|
|
parser.cdata += ']' |
|
1254
|
|
|
} else { |
|
1255
|
|
|
parser.cdata += ']]' + c |
|
1256
|
|
|
parser.state = S.CDATA |
|
1257
|
|
|
} |
|
1258
|
|
|
continue |
|
1259
|
|
|
|
|
1260
|
|
|
case S.PROC_INST: |
|
1261
|
|
|
if (c === '?') { |
|
1262
|
|
|
parser.state = S.PROC_INST_ENDING |
|
1263
|
|
|
} else if (isWhitespace(c)) { |
|
1264
|
|
|
parser.state = S.PROC_INST_BODY |
|
1265
|
|
|
} else { |
|
1266
|
|
|
parser.procInstName += c |
|
1267
|
|
|
} |
|
1268
|
|
|
continue |
|
1269
|
|
|
|
|
1270
|
|
|
case S.PROC_INST_BODY: |
|
1271
|
|
|
if (!parser.procInstBody && isWhitespace(c)) { |
|
1272
|
|
|
continue |
|
1273
|
|
|
} else if (c === '?') { |
|
1274
|
|
|
parser.state = S.PROC_INST_ENDING |
|
1275
|
|
|
} else { |
|
1276
|
|
|
parser.procInstBody += c |
|
1277
|
|
|
} |
|
1278
|
|
|
continue |
|
1279
|
|
|
|
|
1280
|
|
|
case S.PROC_INST_ENDING: |
|
1281
|
|
|
if (c === '>') { |
|
1282
|
|
|
emitNode(parser, 'onprocessinginstruction', { |
|
1283
|
|
|
name: parser.procInstName, |
|
1284
|
|
|
body: parser.procInstBody |
|
1285
|
|
|
}) |
|
1286
|
|
|
parser.procInstName = parser.procInstBody = '' |
|
1287
|
|
|
parser.state = S.TEXT |
|
1288
|
|
|
} else { |
|
1289
|
|
|
parser.procInstBody += '?' + c |
|
1290
|
|
|
parser.state = S.PROC_INST_BODY |
|
1291
|
|
|
} |
|
1292
|
|
|
continue |
|
1293
|
|
|
|
|
1294
|
|
|
case S.OPEN_TAG: |
|
1295
|
|
|
if (isMatch(nameBody, c)) { |
|
1296
|
|
|
parser.tagName += c |
|
1297
|
|
|
} else { |
|
1298
|
|
|
newTag(parser) |
|
1299
|
|
|
if (c === '>') { |
|
1300
|
|
|
openTag(parser) |
|
1301
|
|
|
} else if (c === '/') { |
|
1302
|
|
|
parser.state = S.OPEN_TAG_SLASH |
|
1303
|
|
|
} else { |
|
1304
|
|
|
if (!isWhitespace(c)) { |
|
1305
|
|
|
strictFail(parser, 'Invalid character in tag name') |
|
1306
|
|
|
} |
|
1307
|
|
|
parser.state = S.ATTRIB |
|
1308
|
|
|
} |
|
1309
|
|
|
} |
|
1310
|
|
|
continue |
|
1311
|
|
|
|
|
1312
|
|
|
case S.OPEN_TAG_SLASH: |
|
1313
|
|
|
if (c === '>') { |
|
1314
|
|
|
openTag(parser, true) |
|
1315
|
|
|
closeTag(parser) |
|
1316
|
|
|
} else { |
|
1317
|
|
|
strictFail(parser, 'Forward-slash in opening tag not followed by >') |
|
1318
|
|
|
parser.state = S.ATTRIB |
|
1319
|
|
|
} |
|
1320
|
|
|
continue |
|
1321
|
|
|
|
|
1322
|
|
|
case S.ATTRIB: |
|
1323
|
|
|
// haven't read the attribute name yet. |
|
1324
|
|
|
if (isWhitespace(c)) { |
|
1325
|
|
|
continue |
|
1326
|
|
|
} else if (c === '>') { |
|
1327
|
|
|
openTag(parser) |
|
1328
|
|
|
} else if (c === '/') { |
|
1329
|
|
|
parser.state = S.OPEN_TAG_SLASH |
|
1330
|
|
|
} else if (isMatch(nameStart, c)) { |
|
1331
|
|
|
parser.attribName = c |
|
1332
|
|
|
parser.attribValue = '' |
|
1333
|
|
|
parser.state = S.ATTRIB_NAME |
|
1334
|
|
|
} else { |
|
1335
|
|
|
strictFail(parser, 'Invalid attribute name') |
|
1336
|
|
|
} |
|
1337
|
|
|
continue |
|
1338
|
|
|
|
|
1339
|
|
|
case S.ATTRIB_NAME: |
|
1340
|
|
|
if (c === '=') { |
|
1341
|
|
|
parser.state = S.ATTRIB_VALUE |
|
1342
|
|
|
} else if (c === '>') { |
|
1343
|
|
|
strictFail(parser, 'Attribute without value') |
|
1344
|
|
|
parser.attribValue = parser.attribName |
|
1345
|
|
|
attrib(parser) |
|
1346
|
|
|
openTag(parser) |
|
1347
|
|
|
} else if (isWhitespace(c)) { |
|
1348
|
|
|
parser.state = S.ATTRIB_NAME_SAW_WHITE |
|
1349
|
|
|
} else if (isMatch(nameBody, c)) { |
|
1350
|
|
|
parser.attribName += c |
|
1351
|
|
|
} else { |
|
1352
|
|
|
strictFail(parser, 'Invalid attribute name') |
|
1353
|
|
|
} |
|
1354
|
|
|
continue |
|
1355
|
|
|
|
|
1356
|
|
|
case S.ATTRIB_NAME_SAW_WHITE: |
|
1357
|
|
|
if (c === '=') { |
|
1358
|
|
|
parser.state = S.ATTRIB_VALUE |
|
1359
|
|
|
} else if (isWhitespace(c)) { |
|
1360
|
|
|
continue |
|
1361
|
|
|
} else { |
|
1362
|
|
|
strictFail(parser, 'Attribute without value') |
|
1363
|
|
|
parser.tag.attributes[parser.attribName] = '' |
|
1364
|
|
|
parser.attribValue = '' |
|
1365
|
|
|
emitNode(parser, 'onattribute', { |
|
1366
|
|
|
name: parser.attribName, |
|
1367
|
|
|
value: '' |
|
1368
|
|
|
}) |
|
1369
|
|
|
parser.attribName = '' |
|
1370
|
|
|
if (c === '>') { |
|
1371
|
|
|
openTag(parser) |
|
1372
|
|
|
} else if (isMatch(nameStart, c)) { |
|
1373
|
|
|
parser.attribName = c |
|
1374
|
|
|
parser.state = S.ATTRIB_NAME |
|
1375
|
|
|
} else { |
|
1376
|
|
|
strictFail(parser, 'Invalid attribute name') |
|
1377
|
|
|
parser.state = S.ATTRIB |
|
1378
|
|
|
} |
|
1379
|
|
|
} |
|
1380
|
|
|
continue |
|
1381
|
|
|
|
|
1382
|
|
|
case S.ATTRIB_VALUE: |
|
1383
|
|
|
if (isWhitespace(c)) { |
|
1384
|
|
|
continue |
|
1385
|
|
|
} else if (isQuote(c)) { |
|
1386
|
|
|
parser.q = c |
|
1387
|
|
|
parser.state = S.ATTRIB_VALUE_QUOTED |
|
1388
|
|
|
} else { |
|
1389
|
|
|
if (!parser.opt.unquotedAttributeValues) { |
|
1390
|
|
|
error(parser, 'Unquoted attribute value') |
|
1391
|
|
|
} |
|
1392
|
|
|
parser.state = S.ATTRIB_VALUE_UNQUOTED |
|
1393
|
|
|
parser.attribValue = c |
|
1394
|
|
|
} |
|
1395
|
|
|
continue |
|
1396
|
|
|
|
|
1397
|
|
|
case S.ATTRIB_VALUE_QUOTED: |
|
1398
|
|
|
if (c !== parser.q) { |
|
1399
|
|
|
if (c === '&') { |
|
1400
|
|
|
parser.state = S.ATTRIB_VALUE_ENTITY_Q |
|
1401
|
|
|
} else { |
|
1402
|
|
|
parser.attribValue += c |
|
1403
|
|
|
} |
|
1404
|
|
|
continue |
|
1405
|
|
|
} |
|
1406
|
|
|
attrib(parser) |
|
1407
|
|
|
parser.q = '' |
|
1408
|
|
|
parser.state = S.ATTRIB_VALUE_CLOSED |
|
1409
|
|
|
continue |
|
1410
|
|
|
|
|
1411
|
|
|
case S.ATTRIB_VALUE_CLOSED: |
|
1412
|
|
|
if (isWhitespace(c)) { |
|
1413
|
|
|
parser.state = S.ATTRIB |
|
1414
|
|
|
} else if (c === '>') { |
|
1415
|
|
|
openTag(parser) |
|
1416
|
|
|
} else if (c === '/') { |
|
1417
|
|
|
parser.state = S.OPEN_TAG_SLASH |
|
1418
|
|
|
} else if (isMatch(nameStart, c)) { |
|
1419
|
|
|
strictFail(parser, 'No whitespace between attributes') |
|
1420
|
|
|
parser.attribName = c |
|
1421
|
|
|
parser.attribValue = '' |
|
1422
|
|
|
parser.state = S.ATTRIB_NAME |
|
1423
|
|
|
} else { |
|
1424
|
|
|
strictFail(parser, 'Invalid attribute name') |
|
1425
|
|
|
} |
|
1426
|
|
|
continue |
|
1427
|
|
|
|
|
1428
|
|
|
case S.ATTRIB_VALUE_UNQUOTED: |
|
1429
|
|
|
if (!isAttribEnd(c)) { |
|
1430
|
|
|
if (c === '&') { |
|
1431
|
|
|
parser.state = S.ATTRIB_VALUE_ENTITY_U |
|
1432
|
|
|
} else { |
|
1433
|
|
|
parser.attribValue += c |
|
1434
|
|
|
} |
|
1435
|
|
|
continue |
|
1436
|
|
|
} |
|
1437
|
|
|
attrib(parser) |
|
1438
|
|
|
if (c === '>') { |
|
1439
|
|
|
openTag(parser) |
|
1440
|
|
|
} else { |
|
1441
|
|
|
parser.state = S.ATTRIB |
|
1442
|
|
|
} |
|
1443
|
|
|
continue |
|
1444
|
|
|
|
|
1445
|
|
|
case S.CLOSE_TAG: |
|
1446
|
|
|
if (!parser.tagName) { |
|
1447
|
|
|
if (isWhitespace(c)) { |
|
1448
|
|
|
continue |
|
1449
|
|
|
} else if (notMatch(nameStart, c)) { |
|
1450
|
|
|
if (parser.script) { |
|
1451
|
|
|
parser.script += '</' + c |
|
1452
|
|
|
parser.state = S.SCRIPT |
|
1453
|
|
|
} else { |
|
1454
|
|
|
strictFail(parser, 'Invalid tagname in closing tag.') |
|
1455
|
|
|
} |
|
1456
|
|
|
} else { |
|
1457
|
|
|
parser.tagName = c |
|
1458
|
|
|
} |
|
1459
|
|
|
} else if (c === '>') { |
|
1460
|
|
|
closeTag(parser) |
|
1461
|
|
|
} else if (isMatch(nameBody, c)) { |
|
1462
|
|
|
parser.tagName += c |
|
1463
|
|
|
} else if (parser.script) { |
|
1464
|
|
|
parser.script += '</' + parser.tagName |
|
1465
|
|
|
parser.tagName = '' |
|
1466
|
|
|
parser.state = S.SCRIPT |
|
1467
|
|
|
} else { |
|
1468
|
|
|
if (!isWhitespace(c)) { |
|
1469
|
|
|
strictFail(parser, 'Invalid tagname in closing tag') |
|
1470
|
|
|
} |
|
1471
|
|
|
parser.state = S.CLOSE_TAG_SAW_WHITE |
|
1472
|
|
|
} |
|
1473
|
|
|
continue |
|
1474
|
|
|
|
|
1475
|
|
|
case S.CLOSE_TAG_SAW_WHITE: |
|
1476
|
|
|
if (isWhitespace(c)) { |
|
1477
|
|
|
continue |
|
1478
|
|
|
} |
|
1479
|
|
|
if (c === '>') { |
|
1480
|
|
|
closeTag(parser) |
|
1481
|
|
|
} else { |
|
1482
|
|
|
strictFail(parser, 'Invalid characters in closing tag') |
|
1483
|
|
|
} |
|
1484
|
|
|
continue |
|
1485
|
|
|
|
|
1486
|
|
|
case S.TEXT_ENTITY: |
|
1487
|
|
|
case S.ATTRIB_VALUE_ENTITY_Q: |
|
1488
|
|
|
case S.ATTRIB_VALUE_ENTITY_U: |
|
1489
|
|
|
var returnState |
|
1490
|
|
|
var buffer |
|
1491
|
|
|
switch (parser.state) { |
|
1492
|
|
|
case S.TEXT_ENTITY: |
|
1493
|
|
|
returnState = S.TEXT |
|
1494
|
|
|
buffer = 'textNode' |
|
1495
|
|
|
break |
|
1496
|
|
|
|
|
1497
|
|
|
case S.ATTRIB_VALUE_ENTITY_Q: |
|
1498
|
|
|
returnState = S.ATTRIB_VALUE_QUOTED |
|
1499
|
|
|
buffer = 'attribValue' |
|
1500
|
|
|
break |
|
1501
|
|
|
|
|
1502
|
|
|
case S.ATTRIB_VALUE_ENTITY_U: |
|
1503
|
|
|
returnState = S.ATTRIB_VALUE_UNQUOTED |
|
1504
|
|
|
buffer = 'attribValue' |
|
1505
|
|
|
break |
|
1506
|
|
|
} |
|
1507
|
|
|
|
|
1508
|
|
|
if (c === ';') { |
|
1509
|
|
|
var parsedEntity = parseEntity(parser) |
|
1510
|
|
|
if (parser.opt.unparsedEntities && !Object.values(sax.XML_ENTITIES).includes(parsedEntity)) { |
|
1511
|
|
|
parser.entity = '' |
|
1512
|
|
|
parser.state = returnState |
|
1513
|
|
|
parser.write(parsedEntity) |
|
1514
|
|
|
} else { |
|
1515
|
|
|
parser[buffer] += parsedEntity |
|
1516
|
|
|
parser.entity = '' |
|
1517
|
|
|
parser.state = returnState |
|
1518
|
|
|
} |
|
1519
|
|
|
} else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) { |
|
1520
|
|
|
parser.entity += c |
|
1521
|
|
|
} else { |
|
1522
|
|
|
strictFail(parser, 'Invalid character in entity name') |
|
1523
|
|
|
parser[buffer] += '&' + parser.entity + c |
|
1524
|
|
|
parser.entity = '' |
|
1525
|
|
|
parser.state = returnState |
|
1526
|
|
|
} |
|
1527
|
|
|
|
|
1528
|
|
|
continue |
|
1529
|
|
|
|
|
1530
|
|
|
default: /* istanbul ignore next */ { |
|
1531
|
|
|
throw new Error(parser, 'Unknown state: ' + parser.state) |
|
1532
|
|
|
} |
|
1533
|
|
|
} |
|
1534
|
|
|
} // while |
|
1535
|
|
|
|
|
1536
|
|
|
if (parser.position >= parser.bufferCheckPosition) { |
|
1537
|
|
|
checkBufferLength(parser) |
|
1538
|
|
|
} |
|
1539
|
|
|
return parser |
|
1540
|
|
|
} |
|
1541
|
|
|
|
|
1542
|
|
|
/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */ |
|
1543
|
|
|
/* istanbul ignore next */ |
|
1544
|
|
|
if (!String.fromCodePoint) { |
|
1545
|
|
|
(function () { |
|
1546
|
|
|
var stringFromCharCode = String.fromCharCode |
|
1547
|
|
|
var floor = Math.floor |
|
1548
|
|
|
var fromCodePoint = function () { |
|
1549
|
|
|
var MAX_SIZE = 0x4000 |
|
1550
|
|
|
var codeUnits = [] |
|
1551
|
|
|
var highSurrogate |
|
1552
|
|
|
var lowSurrogate |
|
1553
|
|
|
var index = -1 |
|
1554
|
|
|
var length = arguments.length |
|
1555
|
|
|
if (!length) { |
|
1556
|
|
|
return '' |
|
1557
|
|
|
} |
|
1558
|
|
|
var result = '' |
|
1559
|
|
|
while (++index < length) { |
|
1560
|
|
|
var codePoint = Number(arguments[index]) |
|
1561
|
|
|
if ( |
|
1562
|
|
|
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` |
|
1563
|
|
|
codePoint < 0 || // not a valid Unicode code point |
|
1564
|
|
|
codePoint > 0x10FFFF || // not a valid Unicode code point |
|
1565
|
|
|
floor(codePoint) !== codePoint // not an integer |
|
1566
|
|
|
) { |
|
1567
|
|
|
throw RangeError('Invalid code point: ' + codePoint) |
|
1568
|
|
|
} |
|
1569
|
|
|
if (codePoint <= 0xFFFF) { // BMP code point |
|
1570
|
|
|
codeUnits.push(codePoint) |
|
1571
|
|
|
} else { // Astral code point; split in surrogate halves |
|
1572
|
|
|
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae |
|
1573
|
|
|
codePoint -= 0x10000 |
|
1574
|
|
|
highSurrogate = (codePoint >> 10) + 0xD800 |
|
1575
|
|
|
lowSurrogate = (codePoint % 0x400) + 0xDC00 |
|
1576
|
|
|
codeUnits.push(highSurrogate, lowSurrogate) |
|
1577
|
|
|
} |
|
1578
|
|
|
if (index + 1 === length || codeUnits.length > MAX_SIZE) { |
|
1579
|
|
|
result += stringFromCharCode.apply(null, codeUnits) |
|
1580
|
|
|
codeUnits.length = 0 |
|
1581
|
|
|
} |
|
1582
|
|
|
} |
|
1583
|
|
|
return result |
|
1584
|
|
|
} |
|
1585
|
|
|
/* istanbul ignore next */ |
|
1586
|
|
|
if (Object.defineProperty) { |
|
1587
|
|
|
Object.defineProperty(String, 'fromCodePoint', { |
|
1588
|
|
|
value: fromCodePoint, |
|
1589
|
|
|
configurable: true, |
|
1590
|
|
|
writable: true |
|
1591
|
|
|
}) |
|
1592
|
|
|
} else { |
|
1593
|
|
|
String.fromCodePoint = fromCodePoint |
|
1594
|
|
|
} |
|
1595
|
|
|
}()) |
|
1596
|
|
|
} |
|
1597
|
|
|
})(typeof exports === 'undefined' ? this.sax = {} : exports) |
|
1598
|
|
|
|