Passed
Push — main ( 10dbfc...61a674 )
by LCS
05:39 queued 03:09
created

node_modules/sax/lib/sax.js   F

Complexity

Total Complexity 236
Complexity/F 4.45

Size

Lines of Code 1597
Function Count 53

Duplication

Duplicated Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1211
dl 0
loc 1597
rs 0.8
c 0
b 0
f 0
wmc 236
mnd 183
bc 183
fnc 53
bpm 3.4528
cpm 4.4528
noi 22

How to fix   Complexity   

Complexity

Complex classes like node_modules/sax/lib/sax.js often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
;(function (sax) { // wrapper for non-node envs
2
  sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
3
  sax.SAXParser = SAXParser
4
  sax.SAXStream = SAXStream
5
  sax.createStream = createStream
6
7
  // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
8
  // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
9
  // since that's the earliest that a buffer overrun could occur.  This way, checks are
10
  // as rare as required, but as often as necessary to ensure never crossing this bound.
11
  // Furthermore, buffers are only tested at most once per write(), so passing a very
12
  // large string into write() might have undesirable effects, but this is manageable by
13
  // the caller, so it is assumed to be safe.  Thus, a call to write() may, in the extreme
14
  // edge case, result in creating at most one complete copy of the string passed in.
15
  // Set to Infinity to have unlimited buffers.
16
  sax.MAX_BUFFER_LENGTH = 64 * 1024
17
18
  var buffers = [
19
    'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
20
    'procInstName', 'procInstBody', 'entity', 'attribName',
21
    'attribValue', 'cdata', 'script'
22
  ]
23
24
  sax.EVENTS = [
25
    'text',
26
    'processinginstruction',
27
    'sgmldeclaration',
28
    'doctype',
29
    'comment',
30
    'opentagstart',
31
    'attribute',
32
    'opentag',
33
    'closetag',
34
    'opencdata',
35
    'cdata',
36
    'closecdata',
37
    'error',
38
    'end',
39
    'ready',
40
    'script',
41
    'opennamespace',
42
    'closenamespace'
43
  ]
44
45
  function SAXParser (strict, opt) {
46
    if (!(this instanceof SAXParser)) {
47
      return new SAXParser(strict, opt)
48
    }
49
50
    var parser = this
51
    clearBuffers(parser)
52
    parser.q = parser.c = ''
53
    parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
54
    parser.opt = opt || {}
55
    parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
56
    parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
57
    parser.tags = []
58
    parser.closed = parser.closedRoot = parser.sawRoot = false
59
    parser.tag = parser.error = null
60
    parser.strict = !!strict
61
    parser.noscript = !!(strict || parser.opt.noscript)
62
    parser.state = S.BEGIN
63
    parser.strictEntities = parser.opt.strictEntities
64
    parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
65
    parser.attribList = []
66
67
    // namespaces form a prototype chain.
68
    // it always points at the current tag,
69
    // which protos to its parent tag.
70
    if (parser.opt.xmlns) {
71
      parser.ns = Object.create(rootNS)
72
    }
73
74
    // disallow unquoted attribute values if not otherwise configured
75
    // and strict mode is true
76
    if (parser.opt.unquotedAttributeValues === undefined) {
77
      parser.opt.unquotedAttributeValues = !strict;
78
    }
79
80
    // mostly just for error reporting
81
    parser.trackPosition = parser.opt.position !== false
82
    if (parser.trackPosition) {
83
      parser.position = parser.line = parser.column = 0
84
    }
85
    emit(parser, 'onready')
86
  }
87
88
  if (!Object.create) {
89
    Object.create = function (o) {
90
      function F () {}
91
      F.prototype = o
92
      var newf = new F()
93
      return newf
94
    }
95
  }
96
97
  if (!Object.keys) {
98
    Object.keys = function (o) {
99
      var a = []
100
      for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
101
      return a
102
    }
103
  }
104
105
  function checkBufferLength (parser) {
106
    var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
107
    var maxActual = 0
108
    for (var i = 0, l = buffers.length; i < l; i++) {
109
      var len = parser[buffers[i]].length
110
      if (len > maxAllowed) {
111
        // Text/cdata nodes can get big, and since they're buffered,
112
        // we can get here under normal conditions.
113
        // Avoid issues by emitting the text node now,
114
        // so at least it won't get any bigger.
115
        switch (buffers[i]) {
116
          case 'textNode':
117
            closeText(parser)
118
            break
119
120
          case 'cdata':
121
            emitNode(parser, 'oncdata', parser.cdata)
122
            parser.cdata = ''
123
            break
124
125
          case 'script':
126
            emitNode(parser, 'onscript', parser.script)
127
            parser.script = ''
128
            break
129
130
          default:
131
            error(parser, 'Max buffer length exceeded: ' + buffers[i])
132
        }
133
      }
134
      maxActual = Math.max(maxActual, len)
135
    }
136
    // schedule the next check for the earliest possible buffer overrun.
137
    var m = sax.MAX_BUFFER_LENGTH - maxActual
138
    parser.bufferCheckPosition = m + parser.position
139
  }
140
141
  function clearBuffers (parser) {
142
    for (var i = 0, l = buffers.length; i < l; i++) {
143
      parser[buffers[i]] = ''
144
    }
145
  }
146
147
  function flushBuffers (parser) {
148
    closeText(parser)
149
    if (parser.cdata !== '') {
150
      emitNode(parser, 'oncdata', parser.cdata)
151
      parser.cdata = ''
152
    }
153
    if (parser.script !== '') {
154
      emitNode(parser, 'onscript', parser.script)
155
      parser.script = ''
156
    }
157
  }
158
159
  SAXParser.prototype = {
160
    end: function () { end(this) },
161
    write: write,
162
    resume: function () { this.error = null; return this },
163
    close: function () { return this.write(null) },
164
    flush: function () { flushBuffers(this) }
165
  }
166
167
  var Stream
168
  try {
169
    Stream = require('stream').Stream
170
  } catch (ex) {
171
    Stream = function () {}
172
  }
173
  if (!Stream) Stream = function () {}
174
175
  var streamWraps = sax.EVENTS.filter(function (ev) {
176
    return ev !== 'error' && ev !== 'end'
177
  })
178
179
  function createStream (strict, opt) {
180
    return new SAXStream(strict, opt)
181
  }
182
183
  function SAXStream (strict, opt) {
184
    if (!(this instanceof SAXStream)) {
185
      return new SAXStream(strict, opt)
186
    }
187
188
    Stream.apply(this)
189
190
    this._parser = new SAXParser(strict, opt)
191
    this.writable = true
192
    this.readable = true
193
194
    var me = this
195
196
    this._parser.onend = function () {
197
      me.emit('end')
198
    }
199
200
    this._parser.onerror = function (er) {
201
      me.emit('error', er)
202
203
      // if didn't throw, then means error was handled.
204
      // go ahead and clear error, so we can write again.
205
      me._parser.error = null
206
    }
207
208
    this._decoder = null
209
210
    streamWraps.forEach(function (ev) {
211
      Object.defineProperty(me, 'on' + ev, {
212
        get: function () {
213
          return me._parser['on' + ev]
214
        },
215
        set: function (h) {
216
          if (!h) {
217
            me.removeAllListeners(ev)
218
            me._parser['on' + ev] = h
219
            return h
220
          }
221
          me.on(ev, h)
222
        },
223
        enumerable: true,
224
        configurable: false
225
      })
226
    })
227
  }
228
229
  SAXStream.prototype = Object.create(Stream.prototype, {
230
    constructor: {
231
      value: SAXStream
232
    }
233
  })
234
235
  SAXStream.prototype.write = function (data) {
236
    if (typeof Buffer === 'function' &&
237
      typeof Buffer.isBuffer === 'function' &&
238
      Buffer.isBuffer(data)) {
239
      if (!this._decoder) {
240
        var SD = require('string_decoder').StringDecoder
241
        this._decoder = new SD('utf8')
242
      }
243
      data = this._decoder.write(data)
244
    }
245
246
    this._parser.write(data.toString())
247
    this.emit('data', data)
248
    return true
249
  }
250
251
  SAXStream.prototype.end = function (chunk) {
252
    if (chunk && chunk.length) {
253
      this.write(chunk)
254
    }
255
    this._parser.end()
256
    return true
257
  }
258
259
  SAXStream.prototype.on = function (ev, handler) {
260
    var me = this
261
    if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
262
      me._parser['on' + ev] = function () {
263
        var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
264
        args.splice(0, 0, ev)
265
        me.emit.apply(me, args)
266
      }
267
    }
268
269
    return Stream.prototype.on.call(me, ev, handler)
270
  }
271
272
  // this really needs to be replaced with character classes.
273
  // XML allows all manner of ridiculous numbers and digits.
274
  var CDATA = '[CDATA['
275
  var DOCTYPE = 'DOCTYPE'
276
  var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
277
  var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
278
  var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
279
280
  // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
281
  // This implementation works on strings, a single character at a time
282
  // as such, it cannot ever support astral-plane characters (10000-EFFFF)
283
  // without a significant breaking change to either this  parser, or the
284
  // JavaScript language.  Implementation of an emoji-capable xml parser
285
  // is left as an exercise for the reader.
286
  var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
287
288
  var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
289
290
  var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
291
  var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
292
293
  function isWhitespace (c) {
294
    return c === ' ' || c === '\n' || c === '\r' || c === '\t'
295
  }
296
297
  function isQuote (c) {
298
    return c === '"' || c === '\''
299
  }
300
301
  function isAttribEnd (c) {
302
    return c === '>' || isWhitespace(c)
303
  }
304
305
  function isMatch (regex, c) {
306
    return regex.test(c)
307
  }
308
309
  function notMatch (regex, c) {
310
    return !isMatch(regex, c)
311
  }
312
313
  var S = 0
314
  sax.STATE = {
315
    BEGIN: S++, // leading byte order mark or whitespace
316
    BEGIN_WHITESPACE: S++, // leading whitespace
317
    TEXT: S++, // general stuff
318
    TEXT_ENTITY: S++, // &amp and such.
319
    OPEN_WAKA: S++, // <
320
    SGML_DECL: S++, // <!BLARG
321
    SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
322
    DOCTYPE: S++, // <!DOCTYPE
323
    DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
324
    DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
325
    DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
326
    COMMENT_STARTING: S++, // <!-
327
    COMMENT: S++, // <!--
328
    COMMENT_ENDING: S++, // <!-- blah -
329
    COMMENT_ENDED: S++, // <!-- blah --
330
    CDATA: S++, // <![CDATA[ something
331
    CDATA_ENDING: S++, // ]
332
    CDATA_ENDING_2: S++, // ]]
333
    PROC_INST: S++, // <?hi
334
    PROC_INST_BODY: S++, // <?hi there
335
    PROC_INST_ENDING: S++, // <?hi "there" ?
336
    OPEN_TAG: S++, // <strong
337
    OPEN_TAG_SLASH: S++, // <strong /
338
    ATTRIB: S++, // <a
339
    ATTRIB_NAME: S++, // <a foo
340
    ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
341
    ATTRIB_VALUE: S++, // <a foo=
342
    ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
343
    ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
344
    ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
345
    ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
346
    ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
347
    CLOSE_TAG: S++, // </a
348
    CLOSE_TAG_SAW_WHITE: S++, // </a   >
349
    SCRIPT: S++, // <script> ...
350
    SCRIPT_ENDING: S++ // <script> ... <
351
  }
352
353
  sax.XML_ENTITIES = {
354
    'amp': '&',
355
    'gt': '>',
356
    'lt': '<',
357
    'quot': '"',
358
    'apos': "'"
359
  }
360
361
  sax.ENTITIES = {
362
    'amp': '&',
363
    'gt': '>',
364
    'lt': '<',
365
    'quot': '"',
366
    'apos': "'",
367
    'AElig': 198,
368
    'Aacute': 193,
369
    'Acirc': 194,
370
    'Agrave': 192,
371
    'Aring': 197,
372
    'Atilde': 195,
373
    'Auml': 196,
374
    'Ccedil': 199,
375
    'ETH': 208,
376
    'Eacute': 201,
377
    'Ecirc': 202,
378
    'Egrave': 200,
379
    'Euml': 203,
380
    'Iacute': 205,
381
    'Icirc': 206,
382
    'Igrave': 204,
383
    'Iuml': 207,
384
    'Ntilde': 209,
385
    'Oacute': 211,
386
    'Ocirc': 212,
387
    'Ograve': 210,
388
    'Oslash': 216,
389
    'Otilde': 213,
390
    'Ouml': 214,
391
    'THORN': 222,
392
    'Uacute': 218,
393
    'Ucirc': 219,
394
    'Ugrave': 217,
395
    'Uuml': 220,
396
    'Yacute': 221,
397
    'aacute': 225,
398
    'acirc': 226,
399
    'aelig': 230,
400
    'agrave': 224,
401
    'aring': 229,
402
    'atilde': 227,
403
    'auml': 228,
404
    'ccedil': 231,
405
    'eacute': 233,
406
    'ecirc': 234,
407
    'egrave': 232,
408
    'eth': 240,
409
    'euml': 235,
410
    'iacute': 237,
411
    'icirc': 238,
412
    'igrave': 236,
413
    'iuml': 239,
414
    'ntilde': 241,
415
    'oacute': 243,
416
    'ocirc': 244,
417
    'ograve': 242,
418
    'oslash': 248,
419
    'otilde': 245,
420
    'ouml': 246,
421
    'szlig': 223,
422
    'thorn': 254,
423
    'uacute': 250,
424
    'ucirc': 251,
425
    'ugrave': 249,
426
    'uuml': 252,
427
    'yacute': 253,
428
    'yuml': 255,
429
    'copy': 169,
430
    'reg': 174,
431
    'nbsp': 160,
432
    'iexcl': 161,
433
    'cent': 162,
434
    'pound': 163,
435
    'curren': 164,
436
    'yen': 165,
437
    'brvbar': 166,
438
    'sect': 167,
439
    'uml': 168,
440
    'ordf': 170,
441
    'laquo': 171,
442
    'not': 172,
443
    'shy': 173,
444
    'macr': 175,
445
    'deg': 176,
446
    'plusmn': 177,
447
    'sup1': 185,
448
    'sup2': 178,
449
    'sup3': 179,
450
    'acute': 180,
451
    'micro': 181,
452
    'para': 182,
453
    'middot': 183,
454
    'cedil': 184,
455
    'ordm': 186,
456
    'raquo': 187,
457
    'frac14': 188,
458
    'frac12': 189,
459
    'frac34': 190,
460
    'iquest': 191,
461
    'times': 215,
462
    'divide': 247,
463
    'OElig': 338,
464
    'oelig': 339,
465
    'Scaron': 352,
466
    'scaron': 353,
467
    'Yuml': 376,
468
    'fnof': 402,
469
    'circ': 710,
470
    'tilde': 732,
471
    'Alpha': 913,
472
    'Beta': 914,
473
    'Gamma': 915,
474
    'Delta': 916,
475
    'Epsilon': 917,
476
    'Zeta': 918,
477
    'Eta': 919,
478
    'Theta': 920,
479
    'Iota': 921,
480
    'Kappa': 922,
481
    'Lambda': 923,
482
    'Mu': 924,
483
    'Nu': 925,
484
    'Xi': 926,
485
    'Omicron': 927,
486
    'Pi': 928,
487
    'Rho': 929,
488
    'Sigma': 931,
489
    'Tau': 932,
490
    'Upsilon': 933,
491
    'Phi': 934,
492
    'Chi': 935,
493
    'Psi': 936,
494
    'Omega': 937,
495
    'alpha': 945,
496
    'beta': 946,
497
    'gamma': 947,
498
    'delta': 948,
499
    'epsilon': 949,
500
    'zeta': 950,
501
    'eta': 951,
502
    'theta': 952,
503
    'iota': 953,
504
    'kappa': 954,
505
    'lambda': 955,
506
    'mu': 956,
507
    'nu': 957,
508
    'xi': 958,
509
    'omicron': 959,
510
    'pi': 960,
511
    'rho': 961,
512
    'sigmaf': 962,
513
    'sigma': 963,
514
    'tau': 964,
515
    'upsilon': 965,
516
    'phi': 966,
517
    'chi': 967,
518
    'psi': 968,
519
    'omega': 969,
520
    'thetasym': 977,
521
    'upsih': 978,
522
    'piv': 982,
523
    'ensp': 8194,
524
    'emsp': 8195,
525
    'thinsp': 8201,
526
    'zwnj': 8204,
527
    'zwj': 8205,
528
    'lrm': 8206,
529
    'rlm': 8207,
530
    'ndash': 8211,
531
    'mdash': 8212,
532
    'lsquo': 8216,
533
    'rsquo': 8217,
534
    'sbquo': 8218,
535
    'ldquo': 8220,
536
    'rdquo': 8221,
537
    'bdquo': 8222,
538
    'dagger': 8224,
539
    'Dagger': 8225,
540
    'bull': 8226,
541
    'hellip': 8230,
542
    'permil': 8240,
543
    'prime': 8242,
544
    'Prime': 8243,
545
    'lsaquo': 8249,
546
    'rsaquo': 8250,
547
    'oline': 8254,
548
    'frasl': 8260,
549
    'euro': 8364,
550
    'image': 8465,
551
    'weierp': 8472,
552
    'real': 8476,
553
    'trade': 8482,
554
    'alefsym': 8501,
555
    'larr': 8592,
556
    'uarr': 8593,
557
    'rarr': 8594,
558
    'darr': 8595,
559
    'harr': 8596,
560
    'crarr': 8629,
561
    'lArr': 8656,
562
    'uArr': 8657,
563
    'rArr': 8658,
564
    'dArr': 8659,
565
    'hArr': 8660,
566
    'forall': 8704,
567
    'part': 8706,
568
    'exist': 8707,
569
    'empty': 8709,
570
    'nabla': 8711,
571
    'isin': 8712,
572
    'notin': 8713,
573
    'ni': 8715,
574
    'prod': 8719,
575
    'sum': 8721,
576
    'minus': 8722,
577
    'lowast': 8727,
578
    'radic': 8730,
579
    'prop': 8733,
580
    'infin': 8734,
581
    'ang': 8736,
582
    'and': 8743,
583
    'or': 8744,
584
    'cap': 8745,
585
    'cup': 8746,
586
    'int': 8747,
587
    'there4': 8756,
588
    'sim': 8764,
589
    'cong': 8773,
590
    'asymp': 8776,
591
    'ne': 8800,
592
    'equiv': 8801,
593
    'le': 8804,
594
    'ge': 8805,
595
    'sub': 8834,
596
    'sup': 8835,
597
    'nsub': 8836,
598
    'sube': 8838,
599
    'supe': 8839,
600
    'oplus': 8853,
601
    'otimes': 8855,
602
    'perp': 8869,
603
    'sdot': 8901,
604
    'lceil': 8968,
605
    'rceil': 8969,
606
    'lfloor': 8970,
607
    'rfloor': 8971,
608
    'lang': 9001,
609
    'rang': 9002,
610
    'loz': 9674,
611
    'spades': 9824,
612
    'clubs': 9827,
613
    'hearts': 9829,
614
    'diams': 9830
615
  }
616
617
  Object.keys(sax.ENTITIES).forEach(function (key) {
618
    var e = sax.ENTITIES[key]
619
    var s = typeof e === 'number' ? String.fromCharCode(e) : e
620
    sax.ENTITIES[key] = s
621
  })
622
623
  for (var s in sax.STATE) {
624
    sax.STATE[sax.STATE[s]] = s
625
  }
626
627
  // shorthand
628
  S = sax.STATE
629
630
  function emit (parser, event, data) {
631
    parser[event] && parser[event](data)
632
  }
633
634
  function emitNode (parser, nodeType, data) {
635
    if (parser.textNode) closeText(parser)
636
    emit(parser, nodeType, data)
637
  }
638
639
  function closeText (parser) {
640
    parser.textNode = textopts(parser.opt, parser.textNode)
641
    if (parser.textNode) emit(parser, 'ontext', parser.textNode)
642
    parser.textNode = ''
643
  }
644
645
  function textopts (opt, text) {
646
    if (opt.trim) text = text.trim()
647
    if (opt.normalize) text = text.replace(/\s+/g, ' ')
648
    return text
649
  }
650
651
  function error (parser, er) {
652
    closeText(parser)
653
    if (parser.trackPosition) {
654
      er += '\nLine: ' + parser.line +
655
        '\nColumn: ' + parser.column +
656
        '\nChar: ' + parser.c
657
    }
658
    er = new Error(er)
659
    parser.error = er
660
    emit(parser, 'onerror', er)
661
    return parser
662
  }
663
664
  function end (parser) {
665
    if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
666
    if ((parser.state !== S.BEGIN) &&
667
      (parser.state !== S.BEGIN_WHITESPACE) &&
668
      (parser.state !== S.TEXT)) {
669
      error(parser, 'Unexpected end')
670
    }
671
    closeText(parser)
672
    parser.c = ''
673
    parser.closed = true
674
    emit(parser, 'onend')
675
    SAXParser.call(parser, parser.strict, parser.opt)
676
    return parser
677
  }
678
679
  function strictFail (parser, message) {
680
    if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
681
      throw new Error('bad call to strictFail')
682
    }
683
    if (parser.strict) {
684
      error(parser, message)
685
    }
686
  }
687
688
  function newTag (parser) {
689
    if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
690
    var parent = parser.tags[parser.tags.length - 1] || parser
691
    var tag = parser.tag = { name: parser.tagName, attributes: {} }
692
693
    // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
694
    if (parser.opt.xmlns) {
695
      tag.ns = parent.ns
696
    }
697
    parser.attribList.length = 0
698
    emitNode(parser, 'onopentagstart', tag)
699
  }
700
701
  function qname (name, attribute) {
702
    var i = name.indexOf(':')
703
    var qualName = i < 0 ? [ '', name ] : name.split(':')
704
    var prefix = qualName[0]
705
    var local = qualName[1]
706
707
    // <x "xmlns"="http://foo">
708
    if (attribute && name === 'xmlns') {
709
      prefix = 'xmlns'
710
      local = ''
711
    }
712
713
    return { prefix: prefix, local: local }
714
  }
715
716
  function attrib (parser) {
717
    if (!parser.strict) {
718
      parser.attribName = parser.attribName[parser.looseCase]()
719
    }
720
721
    if (parser.attribList.indexOf(parser.attribName) !== -1 ||
722
      parser.tag.attributes.hasOwnProperty(parser.attribName)) {
723
      parser.attribName = parser.attribValue = ''
724
      return
725
    }
726
727
    if (parser.opt.xmlns) {
728
      var qn = qname(parser.attribName, true)
729
      var prefix = qn.prefix
730
      var local = qn.local
731
732
      if (prefix === 'xmlns') {
733
        // namespace binding attribute. push the binding into scope
734
        if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
735
          strictFail(parser,
736
            'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
737
            'Actual: ' + parser.attribValue)
738
        } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
739
          strictFail(parser,
740
            'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
741
            'Actual: ' + parser.attribValue)
742
        } else {
743
          var tag = parser.tag
744
          var parent = parser.tags[parser.tags.length - 1] || parser
745
          if (tag.ns === parent.ns) {
746
            tag.ns = Object.create(parent.ns)
747
          }
748
          tag.ns[local] = parser.attribValue
749
        }
750
      }
751
752
      // defer onattribute events until all attributes have been seen
753
      // so any new bindings can take effect. preserve attribute order
754
      // so deferred events can be emitted in document order
755
      parser.attribList.push([parser.attribName, parser.attribValue])
756
    } else {
757
      // in non-xmlns mode, we can emit the event right away
758
      parser.tag.attributes[parser.attribName] = parser.attribValue
759
      emitNode(parser, 'onattribute', {
760
        name: parser.attribName,
761
        value: parser.attribValue
762
      })
763
    }
764
765
    parser.attribName = parser.attribValue = ''
766
  }
767
768
  function openTag (parser, selfClosing) {
769
    if (parser.opt.xmlns) {
770
      // emit namespace binding events
771
      var tag = parser.tag
772
773
      // add namespace info to tag
774
      var qn = qname(parser.tagName)
775
      tag.prefix = qn.prefix
776
      tag.local = qn.local
777
      tag.uri = tag.ns[qn.prefix] || ''
778
779
      if (tag.prefix && !tag.uri) {
780
        strictFail(parser, 'Unbound namespace prefix: ' +
781
          JSON.stringify(parser.tagName))
782
        tag.uri = qn.prefix
783
      }
784
785
      var parent = parser.tags[parser.tags.length - 1] || parser
786
      if (tag.ns && parent.ns !== tag.ns) {
787
        Object.keys(tag.ns).forEach(function (p) {
788
          emitNode(parser, 'onopennamespace', {
789
            prefix: p,
790
            uri: tag.ns[p]
791
          })
792
        })
793
      }
794
795
      // handle deferred onattribute events
796
      // Note: do not apply default ns to attributes:
797
      //   http://www.w3.org/TR/REC-xml-names/#defaulting
798
      for (var i = 0, l = parser.attribList.length; i < l; i++) {
799
        var nv = parser.attribList[i]
800
        var name = nv[0]
801
        var value = nv[1]
802
        var qualName = qname(name, true)
803
        var prefix = qualName.prefix
804
        var local = qualName.local
805
        var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
806
        var a = {
807
          name: name,
808
          value: value,
809
          prefix: prefix,
810
          local: local,
811
          uri: uri
812
        }
813
814
        // if there's any attributes with an undefined namespace,
815
        // then fail on them now.
816
        if (prefix && prefix !== 'xmlns' && !uri) {
817
          strictFail(parser, 'Unbound namespace prefix: ' +
818
            JSON.stringify(prefix))
819
          a.uri = prefix
820
        }
821
        parser.tag.attributes[name] = a
822
        emitNode(parser, 'onattribute', a)
823
      }
824
      parser.attribList.length = 0
825
    }
826
827
    parser.tag.isSelfClosing = !!selfClosing
828
829
    // process the tag
830
    parser.sawRoot = true
831
    parser.tags.push(parser.tag)
832
    emitNode(parser, 'onopentag', parser.tag)
833
    if (!selfClosing) {
834
      // special case for <script> in non-strict mode.
835
      if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
836
        parser.state = S.SCRIPT
837
      } else {
838
        parser.state = S.TEXT
839
      }
840
      parser.tag = null
841
      parser.tagName = ''
842
    }
843
    parser.attribName = parser.attribValue = ''
844
    parser.attribList.length = 0
845
  }
846
847
  function closeTag (parser) {
848
    if (!parser.tagName) {
849
      strictFail(parser, 'Weird empty close tag.')
850
      parser.textNode += '</>'
851
      parser.state = S.TEXT
852
      return
853
    }
854
855
    if (parser.script) {
856
      if (parser.tagName !== 'script') {
857
        parser.script += '</' + parser.tagName + '>'
858
        parser.tagName = ''
859
        parser.state = S.SCRIPT
860
        return
861
      }
862
      emitNode(parser, 'onscript', parser.script)
863
      parser.script = ''
864
    }
865
866
    // first make sure that the closing tag actually exists.
867
    // <a><b></c></b></a> will close everything, otherwise.
868
    var t = parser.tags.length
869
    var tagName = parser.tagName
870
    if (!parser.strict) {
871
      tagName = tagName[parser.looseCase]()
872
    }
873
    var closeTo = tagName
874
    while (t--) {
875
      var close = parser.tags[t]
876
      if (close.name !== closeTo) {
877
        // fail the first time in strict mode
878
        strictFail(parser, 'Unexpected close tag')
879
      } else {
880
        break
881
      }
882
    }
883
884
    // didn't find it.  we already failed for strict, so just abort.
885
    if (t < 0) {
886
      strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
887
      parser.textNode += '</' + parser.tagName + '>'
888
      parser.state = S.TEXT
889
      return
890
    }
891
    parser.tagName = tagName
892
    var s = parser.tags.length
893
    while (s-- > t) {
894
      var tag = parser.tag = parser.tags.pop()
895
      parser.tagName = parser.tag.name
896
      emitNode(parser, 'onclosetag', parser.tagName)
897
898
      var x = {}
899
      for (var i in tag.ns) {
900
        x[i] = tag.ns[i]
901
      }
902
903
      var parent = parser.tags[parser.tags.length - 1] || parser
904
      if (parser.opt.xmlns && tag.ns !== parent.ns) {
905
        // remove namespace bindings introduced by tag
906
        Object.keys(tag.ns).forEach(function (p) {
907
          var n = tag.ns[p]
908
          emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
909
        })
910
      }
911
    }
912
    if (t === 0) parser.closedRoot = true
913
    parser.tagName = parser.attribValue = parser.attribName = ''
914
    parser.attribList.length = 0
915
    parser.state = S.TEXT
916
  }
917
918
  function parseEntity (parser) {
919
    var entity = parser.entity
920
    var entityLC = entity.toLowerCase()
921
    var num
922
    var numStr = ''
923
924
    if (parser.ENTITIES[entity]) {
925
      return parser.ENTITIES[entity]
926
    }
927
    if (parser.ENTITIES[entityLC]) {
928
      return parser.ENTITIES[entityLC]
929
    }
930
    entity = entityLC
931
    if (entity.charAt(0) === '#') {
932
      if (entity.charAt(1) === 'x') {
933
        entity = entity.slice(2)
934
        num = parseInt(entity, 16)
935
        numStr = num.toString(16)
936
      } else {
937
        entity = entity.slice(1)
938
        num = parseInt(entity, 10)
939
        numStr = num.toString(10)
940
      }
941
    }
942
    entity = entity.replace(/^0+/, '')
943
    if (isNaN(num) || numStr.toLowerCase() !== entity) {
944
      strictFail(parser, 'Invalid character entity')
945
      return '&' + parser.entity + ';'
946
    }
947
948
    return String.fromCodePoint(num)
949
  }
950
951
  function beginWhiteSpace (parser, c) {
952
    if (c === '<') {
953
      parser.state = S.OPEN_WAKA
954
      parser.startTagPosition = parser.position
955
    } else if (!isWhitespace(c)) {
956
      // have to process this as a text node.
957
      // weird, but happens.
958
      strictFail(parser, 'Non-whitespace before first tag.')
959
      parser.textNode = c
960
      parser.state = S.TEXT
961
    }
962
  }
963
964
  function charAt (chunk, i) {
965
    var result = ''
966
    if (i < chunk.length) {
967
      result = chunk.charAt(i)
968
    }
969
    return result
970
  }
971
972
  function write (chunk) {
973
    var parser = this
974
    if (this.error) {
975
      throw this.error
976
    }
977
    if (parser.closed) {
978
      return error(parser,
979
        'Cannot write after close. Assign an onready handler.')
980
    }
981
    if (chunk === null) {
982
      return end(parser)
983
    }
984
    if (typeof chunk === 'object') {
985
      chunk = chunk.toString()
986
    }
987
    var i = 0
988
    var c = ''
989
    while (true) {
990
      c = charAt(chunk, i++)
991
      parser.c = c
992
993
      if (!c) {
994
        break
995
      }
996
997
      if (parser.trackPosition) {
998
        parser.position++
999
        if (c === '\n') {
1000
          parser.line++
1001
          parser.column = 0
1002
        } else {
1003
          parser.column++
1004
        }
1005
      }
1006
1007
      switch (parser.state) {
1008
        case S.BEGIN:
1009
          parser.state = S.BEGIN_WHITESPACE
1010
          if (c === '\uFEFF') {
1011
            continue
1012
          }
1013
          beginWhiteSpace(parser, c)
1014
          continue
1015
1016
        case S.BEGIN_WHITESPACE:
1017
          beginWhiteSpace(parser, c)
1018
          continue
1019
1020
        case S.TEXT:
1021
          if (parser.sawRoot && !parser.closedRoot) {
1022
            var starti = i - 1
1023
            while (c && c !== '<' && c !== '&') {
1024
              c = charAt(chunk, i++)
1025
              if (c && parser.trackPosition) {
1026
                parser.position++
1027
                if (c === '\n') {
1028
                  parser.line++
1029
                  parser.column = 0
1030
                } else {
1031
                  parser.column++
1032
                }
1033
              }
1034
            }
1035
            parser.textNode += chunk.substring(starti, i - 1)
1036
          }
1037
          if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
1038
            parser.state = S.OPEN_WAKA
1039
            parser.startTagPosition = parser.position
1040
          } else {
1041
            if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
1042
              strictFail(parser, 'Text data outside of root node.')
1043
            }
1044
            if (c === '&') {
1045
              parser.state = S.TEXT_ENTITY
1046
            } else {
1047
              parser.textNode += c
1048
            }
1049
          }
1050
          continue
1051
1052
        case S.SCRIPT:
1053
          // only non-strict
1054
          if (c === '<') {
1055
            parser.state = S.SCRIPT_ENDING
1056
          } else {
1057
            parser.script += c
1058
          }
1059
          continue
1060
1061
        case S.SCRIPT_ENDING:
1062
          if (c === '/') {
1063
            parser.state = S.CLOSE_TAG
1064
          } else {
1065
            parser.script += '<' + c
1066
            parser.state = S.SCRIPT
1067
          }
1068
          continue
1069
1070
        case S.OPEN_WAKA:
1071
          // either a /, ?, !, or text is coming next.
1072
          if (c === '!') {
1073
            parser.state = S.SGML_DECL
1074
            parser.sgmlDecl = ''
1075
          } else if (isWhitespace(c)) {
1076
            // wait for it...
1077
          } else if (isMatch(nameStart, c)) {
1078
            parser.state = S.OPEN_TAG
1079
            parser.tagName = c
1080
          } else if (c === '/') {
1081
            parser.state = S.CLOSE_TAG
1082
            parser.tagName = ''
1083
          } else if (c === '?') {
1084
            parser.state = S.PROC_INST
1085
            parser.procInstName = parser.procInstBody = ''
1086
          } else {
1087
            strictFail(parser, 'Unencoded <')
1088
            // if there was some whitespace, then add that in.
1089
            if (parser.startTagPosition + 1 < parser.position) {
1090
              var pad = parser.position - parser.startTagPosition
1091
              c = new Array(pad).join(' ') + c
1092
            }
1093
            parser.textNode += '<' + c
1094
            parser.state = S.TEXT
1095
          }
1096
          continue
1097
1098
        case S.SGML_DECL:
1099
          if (parser.sgmlDecl + c === '--') {
1100
            parser.state = S.COMMENT
1101
            parser.comment = ''
1102
            parser.sgmlDecl = ''
1103
            continue;
1104
          }
1105
1106
          if (parser.doctype && parser.doctype !== true && parser.sgmlDecl) {
1107
            parser.state = S.DOCTYPE_DTD
1108
            parser.doctype += '<!' + parser.sgmlDecl + c
1109
            parser.sgmlDecl = ''
1110
          } else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
1111
            emitNode(parser, 'onopencdata')
1112
            parser.state = S.CDATA
1113
            parser.sgmlDecl = ''
1114
            parser.cdata = ''
1115
          } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
1116
            parser.state = S.DOCTYPE
1117
            if (parser.doctype || parser.sawRoot) {
1118
              strictFail(parser,
1119
                'Inappropriately located doctype declaration')
1120
            }
1121
            parser.doctype = ''
1122
            parser.sgmlDecl = ''
1123
          } else if (c === '>') {
1124
            emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
1125
            parser.sgmlDecl = ''
1126
            parser.state = S.TEXT
1127
          } else if (isQuote(c)) {
1128
            parser.state = S.SGML_DECL_QUOTED
1129
            parser.sgmlDecl += c
1130
          } else {
1131
            parser.sgmlDecl += c
1132
          }
1133
          continue
1134
1135
        case S.SGML_DECL_QUOTED:
1136
          if (c === parser.q) {
1137
            parser.state = S.SGML_DECL
1138
            parser.q = ''
1139
          }
1140
          parser.sgmlDecl += c
1141
          continue
1142
1143
        case S.DOCTYPE:
1144
          if (c === '>') {
1145
            parser.state = S.TEXT
1146
            emitNode(parser, 'ondoctype', parser.doctype)
1147
            parser.doctype = true // just remember that we saw it.
1148
          } else {
1149
            parser.doctype += c
1150
            if (c === '[') {
1151
              parser.state = S.DOCTYPE_DTD
1152
            } else if (isQuote(c)) {
1153
              parser.state = S.DOCTYPE_QUOTED
1154
              parser.q = c
1155
            }
1156
          }
1157
          continue
1158
1159
        case S.DOCTYPE_QUOTED:
1160
          parser.doctype += c
1161
          if (c === parser.q) {
1162
            parser.q = ''
1163
            parser.state = S.DOCTYPE
1164
          }
1165
          continue
1166
1167
        case S.DOCTYPE_DTD:
1168
          if (c === ']') {
1169
            parser.doctype += c
1170
            parser.state = S.DOCTYPE
1171
          } else if (c === '<') {
1172
            parser.state = S.OPEN_WAKA
1173
            parser.startTagPosition = parser.position
1174
          } else if (isQuote(c)) {
1175
            parser.doctype += c
1176
            parser.state = S.DOCTYPE_DTD_QUOTED
1177
            parser.q = c
1178
          } else {
1179
            parser.doctype += c
1180
          }
1181
          continue
1182
1183
        case S.DOCTYPE_DTD_QUOTED:
1184
          parser.doctype += c
1185
          if (c === parser.q) {
1186
            parser.state = S.DOCTYPE_DTD
1187
            parser.q = ''
1188
          }
1189
          continue
1190
1191
        case S.COMMENT:
1192
          if (c === '-') {
1193
            parser.state = S.COMMENT_ENDING
1194
          } else {
1195
            parser.comment += c
1196
          }
1197
          continue
1198
1199
        case S.COMMENT_ENDING:
1200
          if (c === '-') {
1201
            parser.state = S.COMMENT_ENDED
1202
            parser.comment = textopts(parser.opt, parser.comment)
1203
            if (parser.comment) {
1204
              emitNode(parser, 'oncomment', parser.comment)
1205
            }
1206
            parser.comment = ''
1207
          } else {
1208
            parser.comment += '-' + c
1209
            parser.state = S.COMMENT
1210
          }
1211
          continue
1212
1213
        case S.COMMENT_ENDED:
1214
          if (c !== '>') {
1215
            strictFail(parser, 'Malformed comment')
1216
            // allow <!-- blah -- bloo --> in non-strict mode,
1217
            // which is a comment of " blah -- bloo "
1218
            parser.comment += '--' + c
1219
            parser.state = S.COMMENT
1220
          } else if (parser.doctype && parser.doctype !== true) {
1221
            parser.state = S.DOCTYPE_DTD
1222
          } else {
1223
            parser.state = S.TEXT
1224
          }
1225
          continue
1226
1227
        case S.CDATA:
1228
          if (c === ']') {
1229
            parser.state = S.CDATA_ENDING
1230
          } else {
1231
            parser.cdata += c
1232
          }
1233
          continue
1234
1235
        case S.CDATA_ENDING:
1236
          if (c === ']') {
1237
            parser.state = S.CDATA_ENDING_2
1238
          } else {
1239
            parser.cdata += ']' + c
1240
            parser.state = S.CDATA
1241
          }
1242
          continue
1243
1244
        case S.CDATA_ENDING_2:
1245
          if (c === '>') {
1246
            if (parser.cdata) {
1247
              emitNode(parser, 'oncdata', parser.cdata)
1248
            }
1249
            emitNode(parser, 'onclosecdata')
1250
            parser.cdata = ''
1251
            parser.state = S.TEXT
1252
          } else if (c === ']') {
1253
            parser.cdata += ']'
1254
          } else {
1255
            parser.cdata += ']]' + c
1256
            parser.state = S.CDATA
1257
          }
1258
          continue
1259
1260
        case S.PROC_INST:
1261
          if (c === '?') {
1262
            parser.state = S.PROC_INST_ENDING
1263
          } else if (isWhitespace(c)) {
1264
            parser.state = S.PROC_INST_BODY
1265
          } else {
1266
            parser.procInstName += c
1267
          }
1268
          continue
1269
1270
        case S.PROC_INST_BODY:
1271
          if (!parser.procInstBody && isWhitespace(c)) {
1272
            continue
1273
          } else if (c === '?') {
1274
            parser.state = S.PROC_INST_ENDING
1275
          } else {
1276
            parser.procInstBody += c
1277
          }
1278
          continue
1279
1280
        case S.PROC_INST_ENDING:
1281
          if (c === '>') {
1282
            emitNode(parser, 'onprocessinginstruction', {
1283
              name: parser.procInstName,
1284
              body: parser.procInstBody
1285
            })
1286
            parser.procInstName = parser.procInstBody = ''
1287
            parser.state = S.TEXT
1288
          } else {
1289
            parser.procInstBody += '?' + c
1290
            parser.state = S.PROC_INST_BODY
1291
          }
1292
          continue
1293
1294
        case S.OPEN_TAG:
1295
          if (isMatch(nameBody, c)) {
1296
            parser.tagName += c
1297
          } else {
1298
            newTag(parser)
1299
            if (c === '>') {
1300
              openTag(parser)
1301
            } else if (c === '/') {
1302
              parser.state = S.OPEN_TAG_SLASH
1303
            } else {
1304
              if (!isWhitespace(c)) {
1305
                strictFail(parser, 'Invalid character in tag name')
1306
              }
1307
              parser.state = S.ATTRIB
1308
            }
1309
          }
1310
          continue
1311
1312
        case S.OPEN_TAG_SLASH:
1313
          if (c === '>') {
1314
            openTag(parser, true)
1315
            closeTag(parser)
1316
          } else {
1317
            strictFail(parser, 'Forward-slash in opening tag not followed by >')
1318
            parser.state = S.ATTRIB
1319
          }
1320
          continue
1321
1322
        case S.ATTRIB:
1323
          // haven't read the attribute name yet.
1324
          if (isWhitespace(c)) {
1325
            continue
1326
          } else if (c === '>') {
1327
            openTag(parser)
1328
          } else if (c === '/') {
1329
            parser.state = S.OPEN_TAG_SLASH
1330
          } else if (isMatch(nameStart, c)) {
1331
            parser.attribName = c
1332
            parser.attribValue = ''
1333
            parser.state = S.ATTRIB_NAME
1334
          } else {
1335
            strictFail(parser, 'Invalid attribute name')
1336
          }
1337
          continue
1338
1339
        case S.ATTRIB_NAME:
1340
          if (c === '=') {
1341
            parser.state = S.ATTRIB_VALUE
1342
          } else if (c === '>') {
1343
            strictFail(parser, 'Attribute without value')
1344
            parser.attribValue = parser.attribName
1345
            attrib(parser)
1346
            openTag(parser)
1347
          } else if (isWhitespace(c)) {
1348
            parser.state = S.ATTRIB_NAME_SAW_WHITE
1349
          } else if (isMatch(nameBody, c)) {
1350
            parser.attribName += c
1351
          } else {
1352
            strictFail(parser, 'Invalid attribute name')
1353
          }
1354
          continue
1355
1356
        case S.ATTRIB_NAME_SAW_WHITE:
1357
          if (c === '=') {
1358
            parser.state = S.ATTRIB_VALUE
1359
          } else if (isWhitespace(c)) {
1360
            continue
1361
          } else {
1362
            strictFail(parser, 'Attribute without value')
1363
            parser.tag.attributes[parser.attribName] = ''
1364
            parser.attribValue = ''
1365
            emitNode(parser, 'onattribute', {
1366
              name: parser.attribName,
1367
              value: ''
1368
            })
1369
            parser.attribName = ''
1370
            if (c === '>') {
1371
              openTag(parser)
1372
            } else if (isMatch(nameStart, c)) {
1373
              parser.attribName = c
1374
              parser.state = S.ATTRIB_NAME
1375
            } else {
1376
              strictFail(parser, 'Invalid attribute name')
1377
              parser.state = S.ATTRIB
1378
            }
1379
          }
1380
          continue
1381
1382
        case S.ATTRIB_VALUE:
1383
          if (isWhitespace(c)) {
1384
            continue
1385
          } else if (isQuote(c)) {
1386
            parser.q = c
1387
            parser.state = S.ATTRIB_VALUE_QUOTED
1388
          } else {
1389
            if (!parser.opt.unquotedAttributeValues) {
1390
              error(parser, 'Unquoted attribute value')
1391
            }
1392
            parser.state = S.ATTRIB_VALUE_UNQUOTED
1393
            parser.attribValue = c
1394
          }
1395
          continue
1396
1397
        case S.ATTRIB_VALUE_QUOTED:
1398
          if (c !== parser.q) {
1399
            if (c === '&') {
1400
              parser.state = S.ATTRIB_VALUE_ENTITY_Q
1401
            } else {
1402
              parser.attribValue += c
1403
            }
1404
            continue
1405
          }
1406
          attrib(parser)
1407
          parser.q = ''
1408
          parser.state = S.ATTRIB_VALUE_CLOSED
1409
          continue
1410
1411
        case S.ATTRIB_VALUE_CLOSED:
1412
          if (isWhitespace(c)) {
1413
            parser.state = S.ATTRIB
1414
          } else if (c === '>') {
1415
            openTag(parser)
1416
          } else if (c === '/') {
1417
            parser.state = S.OPEN_TAG_SLASH
1418
          } else if (isMatch(nameStart, c)) {
1419
            strictFail(parser, 'No whitespace between attributes')
1420
            parser.attribName = c
1421
            parser.attribValue = ''
1422
            parser.state = S.ATTRIB_NAME
1423
          } else {
1424
            strictFail(parser, 'Invalid attribute name')
1425
          }
1426
          continue
1427
1428
        case S.ATTRIB_VALUE_UNQUOTED:
1429
          if (!isAttribEnd(c)) {
1430
            if (c === '&') {
1431
              parser.state = S.ATTRIB_VALUE_ENTITY_U
1432
            } else {
1433
              parser.attribValue += c
1434
            }
1435
            continue
1436
          }
1437
          attrib(parser)
1438
          if (c === '>') {
1439
            openTag(parser)
1440
          } else {
1441
            parser.state = S.ATTRIB
1442
          }
1443
          continue
1444
1445
        case S.CLOSE_TAG:
1446
          if (!parser.tagName) {
1447
            if (isWhitespace(c)) {
1448
              continue
1449
            } else if (notMatch(nameStart, c)) {
1450
              if (parser.script) {
1451
                parser.script += '</' + c
1452
                parser.state = S.SCRIPT
1453
              } else {
1454
                strictFail(parser, 'Invalid tagname in closing tag.')
1455
              }
1456
            } else {
1457
              parser.tagName = c
1458
            }
1459
          } else if (c === '>') {
1460
            closeTag(parser)
1461
          } else if (isMatch(nameBody, c)) {
1462
            parser.tagName += c
1463
          } else if (parser.script) {
1464
            parser.script += '</' + parser.tagName
1465
            parser.tagName = ''
1466
            parser.state = S.SCRIPT
1467
          } else {
1468
            if (!isWhitespace(c)) {
1469
              strictFail(parser, 'Invalid tagname in closing tag')
1470
            }
1471
            parser.state = S.CLOSE_TAG_SAW_WHITE
1472
          }
1473
          continue
1474
1475
        case S.CLOSE_TAG_SAW_WHITE:
1476
          if (isWhitespace(c)) {
1477
            continue
1478
          }
1479
          if (c === '>') {
1480
            closeTag(parser)
1481
          } else {
1482
            strictFail(parser, 'Invalid characters in closing tag')
1483
          }
1484
          continue
1485
1486
        case S.TEXT_ENTITY:
1487
        case S.ATTRIB_VALUE_ENTITY_Q:
1488
        case S.ATTRIB_VALUE_ENTITY_U:
1489
          var returnState
1490
          var buffer
1491
          switch (parser.state) {
1492
            case S.TEXT_ENTITY:
1493
              returnState = S.TEXT
1494
              buffer = 'textNode'
1495
              break
1496
1497
            case S.ATTRIB_VALUE_ENTITY_Q:
1498
              returnState = S.ATTRIB_VALUE_QUOTED
1499
              buffer = 'attribValue'
1500
              break
1501
1502
            case S.ATTRIB_VALUE_ENTITY_U:
1503
              returnState = S.ATTRIB_VALUE_UNQUOTED
1504
              buffer = 'attribValue'
1505
              break
1506
          }
1507
1508
          if (c === ';') {
1509
            var parsedEntity = parseEntity(parser)
1510
            if (parser.opt.unparsedEntities && !Object.values(sax.XML_ENTITIES).includes(parsedEntity)) {
1511
              parser.entity = ''
1512
              parser.state = returnState
1513
              parser.write(parsedEntity)
1514
            } else {
1515
              parser[buffer] += parsedEntity
1516
              parser.entity = ''
1517
              parser.state = returnState
1518
            }
1519
          } else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
1520
            parser.entity += c
1521
          } else {
1522
            strictFail(parser, 'Invalid character in entity name')
1523
            parser[buffer] += '&' + parser.entity + c
1524
            parser.entity = ''
1525
            parser.state = returnState
1526
          }
1527
1528
          continue
1529
1530
        default: /* istanbul ignore next */ {
1531
          throw new Error(parser, 'Unknown state: ' + parser.state)
1532
        }
1533
      }
1534
    } // while
1535
1536
    if (parser.position >= parser.bufferCheckPosition) {
1537
      checkBufferLength(parser)
1538
    }
1539
    return parser
1540
  }
1541
1542
  /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
1543
  /* istanbul ignore next */
1544
  if (!String.fromCodePoint) {
1545
    (function () {
1546
      var stringFromCharCode = String.fromCharCode
1547
      var floor = Math.floor
1548
      var fromCodePoint = function () {
1549
        var MAX_SIZE = 0x4000
1550
        var codeUnits = []
1551
        var highSurrogate
1552
        var lowSurrogate
1553
        var index = -1
1554
        var length = arguments.length
1555
        if (!length) {
1556
          return ''
1557
        }
1558
        var result = ''
1559
        while (++index < length) {
1560
          var codePoint = Number(arguments[index])
1561
          if (
1562
            !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
1563
            codePoint < 0 || // not a valid Unicode code point
1564
            codePoint > 0x10FFFF || // not a valid Unicode code point
1565
            floor(codePoint) !== codePoint // not an integer
1566
          ) {
1567
            throw RangeError('Invalid code point: ' + codePoint)
1568
          }
1569
          if (codePoint <= 0xFFFF) { // BMP code point
1570
            codeUnits.push(codePoint)
1571
          } else { // Astral code point; split in surrogate halves
1572
            // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
1573
            codePoint -= 0x10000
1574
            highSurrogate = (codePoint >> 10) + 0xD800
1575
            lowSurrogate = (codePoint % 0x400) + 0xDC00
1576
            codeUnits.push(highSurrogate, lowSurrogate)
1577
          }
1578
          if (index + 1 === length || codeUnits.length > MAX_SIZE) {
1579
            result += stringFromCharCode.apply(null, codeUnits)
1580
            codeUnits.length = 0
1581
          }
1582
        }
1583
        return result
1584
      }
1585
      /* istanbul ignore next */
1586
      if (Object.defineProperty) {
1587
        Object.defineProperty(String, 'fromCodePoint', {
1588
          value: fromCodePoint,
1589
          configurable: true,
1590
          writable: true
1591
        })
1592
      } else {
1593
        String.fromCodePoint = fromCodePoint
1594
      }
1595
    }())
1596
  }
1597
})(typeof exports === 'undefined' ? this.sax = {} : exports)
1598