Passed
Push — main ( 10dbfc...61a674 )
by LCS
05:39 queued 03:09
created

node_modules/iconv-lite/encodings/dbcs-codec.js   F

Complexity

Total Complexity 104
Complexity/F 7.43

Size

Lines of Code 554
Function Count 14

Duplication

Duplicated Lines 11
Ratio 1.99 %

Importance

Changes 0
Metric Value
eloc 322
dl 11
loc 554
rs 2
c 0
b 0
f 0
wmc 104
mnd 90
bc 90
fnc 14
bpm 6.4285
cpm 7.4285
noi 60

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like node_modules/iconv-lite/encodings/dbcs-codec.js often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"use strict";
2
var Buffer = require("safer-buffer").Buffer;
3
4
// Multibyte codec. In this scheme, a character is represented by 1 or more bytes.
5
// Our codec supports UTF-16 surrogates, extensions for GB18030 and unicode sequences.
6
// To save memory and loading time, we read table files only when requested.
7
8
exports._dbcs = DBCSCodec;
9
10
var UNASSIGNED = -1,
11
    GB18030_CODE = -2,
12
    SEQ_START  = -10,
13
    NODE_START = -1000,
14
    UNASSIGNED_NODE = new Array(0x100),
15
    DEF_CHAR = -1;
16
17
for (var i = 0; i < 0x100; i++)
18
    UNASSIGNED_NODE[i] = UNASSIGNED;
19
20
21
// Class DBCSCodec reads and initializes mapping tables.
22
function DBCSCodec(codecOptions, iconv) {
23
    this.encodingName = codecOptions.encodingName;
24
    if (!codecOptions)
25
        throw new Error("DBCS codec is called without the data.")
26
    if (!codecOptions.table)
27
        throw new Error("Encoding '" + this.encodingName + "' has no data.");
28
29
    // Load tables.
30
    var mappingTable = codecOptions.table();
31
32
33
    // Decode tables: MBCS -> Unicode.
34
35
    // decodeTables is a trie, encoded as an array of arrays of integers. Internal arrays are trie nodes and all have len = 256.
36
    // Trie root is decodeTables[0].
37
    // Values: >=  0 -> unicode character code. can be > 0xFFFF
38
    //         == UNASSIGNED -> unknown/unassigned sequence.
39
    //         == GB18030_CODE -> this is the end of a GB18030 4-byte sequence.
40
    //         <= NODE_START -> index of the next node in our trie to process next byte.
41
    //         <= SEQ_START  -> index of the start of a character code sequence, in decodeTableSeq.
42
    this.decodeTables = [];
43
    this.decodeTables[0] = UNASSIGNED_NODE.slice(0); // Create root node.
44
45
    // Sometimes a MBCS char corresponds to a sequence of unicode chars. We store them as arrays of integers here. 
46
    this.decodeTableSeq = [];
47
48
    // Actual mapping tables consist of chunks. Use them to fill up decode tables.
49
    for (var i = 0; i < mappingTable.length; i++)
50
        this._addDecodeChunk(mappingTable[i]);
51
52
    this.defaultCharUnicode = iconv.defaultCharUnicode;
53
54
    
55
    // Encode tables: Unicode -> DBCS.
56
57
    // `encodeTable` is array mapping from unicode char to encoded char. All its values are integers for performance.
58
    // Because it can be sparse, it is represented as array of buckets by 256 chars each. Bucket can be null.
59
    // Values: >=  0 -> it is a normal char. Write the value (if <=256 then 1 byte, if <=65536 then 2 bytes, etc.).
60
    //         == UNASSIGNED -> no conversion found. Output a default char.
61
    //         <= SEQ_START  -> it's an index in encodeTableSeq, see below. The character starts a sequence.
62
    this.encodeTable = [];
63
    
64
    // `encodeTableSeq` is used when a sequence of unicode characters is encoded as a single code. We use a tree of
65
    // objects where keys correspond to characters in sequence and leafs are the encoded dbcs values. A special DEF_CHAR key
66
    // means end of sequence (needed when one sequence is a strict subsequence of another).
67
    // Objects are kept separately from encodeTable to increase performance.
68
    this.encodeTableSeq = [];
69
70
    // Some chars can be decoded, but need not be encoded.
71
    var skipEncodeChars = {};
72
    if (codecOptions.encodeSkipVals)
73
        for (var i = 0; i < codecOptions.encodeSkipVals.length; i++) {
74
            var val = codecOptions.encodeSkipVals[i];
75
            if (typeof val === 'number')
76
                skipEncodeChars[val] = true;
77
            else
78
                for (var j = val.from; j <= val.to; j++)
79
                    skipEncodeChars[j] = true;
80
        }
81
        
82
    // Use decode trie to recursively fill out encode tables.
83
    this._fillEncodeTable(0, 0, skipEncodeChars);
84
85
    // Add more encoding pairs when needed.
86
    if (codecOptions.encodeAdd) {
87
        for (var uChar in codecOptions.encodeAdd)
88
            if (Object.prototype.hasOwnProperty.call(codecOptions.encodeAdd, uChar))
89
                this._setEncodeChar(uChar.charCodeAt(0), codecOptions.encodeAdd[uChar]);
90
    }
91
92
    this.defCharSB  = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)];
93
    if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]['?'];
94
    if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0);
95
96
97
    // Load & create GB18030 tables when needed.
98
    if (typeof codecOptions.gb18030 === 'function') {
99
        this.gb18030 = codecOptions.gb18030(); // Load GB18030 ranges.
100
101
        // Add GB18030 decode tables.
102
        var thirdByteNodeIdx = this.decodeTables.length;
103
        var thirdByteNode = this.decodeTables[thirdByteNodeIdx] = UNASSIGNED_NODE.slice(0);
104
105
        var fourthByteNodeIdx = this.decodeTables.length;
106
        var fourthByteNode = this.decodeTables[fourthByteNodeIdx] = UNASSIGNED_NODE.slice(0);
107
108
        for (var i = 0x81; i <= 0xFE; i++) {
109
            var secondByteNodeIdx = NODE_START - this.decodeTables[0][i];
110
            var secondByteNode = this.decodeTables[secondByteNodeIdx];
111
            for (var j = 0x30; j <= 0x39; j++)
112
                secondByteNode[j] = NODE_START - thirdByteNodeIdx;
113
        }
114
        for (var i = 0x81; i <= 0xFE; i++)
115
            thirdByteNode[i] = NODE_START - fourthByteNodeIdx;
116
        for (var i = 0x30; i <= 0x39; i++)
117
            fourthByteNode[i] = GB18030_CODE
118
    }        
119
}
120
121
DBCSCodec.prototype.encoder = DBCSEncoder;
122
DBCSCodec.prototype.decoder = DBCSDecoder;
123
124
// Decoder helpers
125
DBCSCodec.prototype._getDecodeTrieNode = function(addr) {
126
    var bytes = [];
127
    for (; addr > 0; addr >>= 8)
128
        bytes.push(addr & 0xFF);
129
    if (bytes.length == 0)
130
        bytes.push(0);
131
132
    var node = this.decodeTables[0];
133
    for (var i = bytes.length-1; i > 0; i--) { // Traverse nodes deeper into the trie.
134
        var val = node[bytes[i]];
135
136
        if (val == UNASSIGNED) { // Create new node.
137
            node[bytes[i]] = NODE_START - this.decodeTables.length;
138
            this.decodeTables.push(node = UNASSIGNED_NODE.slice(0));
139
        }
140
        else if (val <= NODE_START) { // Existing node.
141
            node = this.decodeTables[NODE_START - val];
142
        }
143
        else
144
            throw new Error("Overwrite byte in " + this.encodingName + ", addr: " + addr.toString(16));
145
    }
146
    return node;
147
}
148
149
150
DBCSCodec.prototype._addDecodeChunk = function(chunk) {
151
    // First element of chunk is the hex mbcs code where we start.
152
    var curAddr = parseInt(chunk[0], 16);
153
154
    // Choose the decoding node where we'll write our chars.
155
    var writeTable = this._getDecodeTrieNode(curAddr);
156
    curAddr = curAddr & 0xFF;
157
158
    // Write all other elements of the chunk to the table.
159
    for (var k = 1; k < chunk.length; k++) {
160
        var part = chunk[k];
161
        if (typeof part === "string") { // String, write as-is.
162
            for (var l = 0; l < part.length;) {
163
                var code = part.charCodeAt(l++);
164
                if (0xD800 <= code && code < 0xDC00) { // Decode surrogate
165
                    var codeTrail = part.charCodeAt(l++);
166
                    if (0xDC00 <= codeTrail && codeTrail < 0xE000)
167
                        writeTable[curAddr++] = 0x10000 + (code - 0xD800) * 0x400 + (codeTrail - 0xDC00);
168
                    else
169
                        throw new Error("Incorrect surrogate pair in "  + this.encodingName + " at chunk " + chunk[0]);
170
                }
171
                else if (0x0FF0 < code && code <= 0x0FFF) { // Character sequence (our own encoding used)
172
                    var len = 0xFFF - code + 2;
173
                    var seq = [];
174
                    for (var m = 0; m < len; m++)
175
                        seq.push(part.charCodeAt(l++)); // Simple variation: don't support surrogates or subsequences in seq.
176
177
                    writeTable[curAddr++] = SEQ_START - this.decodeTableSeq.length;
178
                    this.decodeTableSeq.push(seq);
179
                }
180
                else
181
                    writeTable[curAddr++] = code; // Basic char
182
            }
183
        } 
184
        else if (typeof part === "number") { // Integer, meaning increasing sequence starting with prev character.
185
            var charCode = writeTable[curAddr - 1] + 1;
186
            for (var l = 0; l < part; l++)
187
                writeTable[curAddr++] = charCode++;
188
        }
189
        else
190
            throw new Error("Incorrect type '" + typeof part + "' given in "  + this.encodingName + " at chunk " + chunk[0]);
191
    }
192
    if (curAddr > 0xFF)
193
        throw new Error("Incorrect chunk in "  + this.encodingName + " at addr " + chunk[0] + ": too long" + curAddr);
194
}
195
196
// Encoder helpers
197
DBCSCodec.prototype._getEncodeBucket = function(uCode) {
198
    var high = uCode >> 8; // This could be > 0xFF because of astral characters.
199
    if (this.encodeTable[high] === undefined)
200
        this.encodeTable[high] = UNASSIGNED_NODE.slice(0); // Create bucket on demand.
201
    return this.encodeTable[high];
202
}
203
204
DBCSCodec.prototype._setEncodeChar = function(uCode, dbcsCode) {
205
    var bucket = this._getEncodeBucket(uCode);
206
    var low = uCode & 0xFF;
207
    if (bucket[low] <= SEQ_START)
208
        this.encodeTableSeq[SEQ_START-bucket[low]][DEF_CHAR] = dbcsCode; // There's already a sequence, set a single-char subsequence of it.
209
    else if (bucket[low] == UNASSIGNED)
210
        bucket[low] = dbcsCode;
211
}
212
213
DBCSCodec.prototype._setEncodeSequence = function(seq, dbcsCode) {
214
    
215
    // Get the root of character tree according to first character of the sequence.
216
    var uCode = seq[0];
217
    var bucket = this._getEncodeBucket(uCode);
218
    var low = uCode & 0xFF;
219
220
    var node;
221
    if (bucket[low] <= SEQ_START) {
222
        // There's already a sequence with  - use it.
223
        node = this.encodeTableSeq[SEQ_START-bucket[low]];
224
    }
225
    else {
226
        // There was no sequence object - allocate a new one.
227
        node = {};
228
        if (bucket[low] !== UNASSIGNED) node[DEF_CHAR] = bucket[low]; // If a char was set before - make it a single-char subsequence.
229
        bucket[low] = SEQ_START - this.encodeTableSeq.length;
230
        this.encodeTableSeq.push(node);
231
    }
232
233
    // Traverse the character tree, allocating new nodes as needed.
234
    for (var j = 1; j < seq.length-1; j++) {
235
        var oldVal = node[uCode];
236
        if (typeof oldVal === 'object')
237
            node = oldVal;
238
        else {
239
            node = node[uCode] = {}
240
            if (oldVal !== undefined)
241
                node[DEF_CHAR] = oldVal
242
        }
243
    }
244
245
    // Set the leaf to given dbcsCode.
246
    uCode = seq[seq.length-1];
247
    node[uCode] = dbcsCode;
248
}
249
250
DBCSCodec.prototype._fillEncodeTable = function(nodeIdx, prefix, skipEncodeChars) {
251
    var node = this.decodeTables[nodeIdx];
252
    for (var i = 0; i < 0x100; i++) {
253
        var uCode = node[i];
254
        var mbCode = prefix + i;
255
        if (skipEncodeChars[mbCode])
256
            continue;
257
258
        if (uCode >= 0)
259
            this._setEncodeChar(uCode, mbCode);
260
        else if (uCode <= NODE_START)
261
            this._fillEncodeTable(NODE_START - uCode, mbCode << 8, skipEncodeChars);
262
        else if (uCode <= SEQ_START)
263
            this._setEncodeSequence(this.decodeTableSeq[SEQ_START - uCode], mbCode);
264
    }
265
}
266
267
268
269
// == Encoder ==================================================================
270
271
function DBCSEncoder(options, codec) {
272
    // Encoder state
273
    this.leadSurrogate = -1;
274
    this.seqObj = undefined;
275
    
276
    // Static data
277
    this.encodeTable = codec.encodeTable;
278
    this.encodeTableSeq = codec.encodeTableSeq;
279
    this.defaultCharSingleByte = codec.defCharSB;
280
    this.gb18030 = codec.gb18030;
281
}
282
283
DBCSEncoder.prototype.write = function(str) {
284
    var newBuf = Buffer.alloc(str.length * (this.gb18030 ? 4 : 3)),
285
        leadSurrogate = this.leadSurrogate,
286
        seqObj = this.seqObj, nextChar = -1,
287
        i = 0, j = 0;
288
289
    while (true) {
290
        // 0. Get next character.
291
        if (nextChar === -1) {
292
            if (i == str.length) break;
293
            var uCode = str.charCodeAt(i++);
294
        }
295
        else {
296
            var uCode = nextChar;
297
            nextChar = -1;    
298
        }
299
300
        // 1. Handle surrogates.
301
        if (0xD800 <= uCode && uCode < 0xE000) { // Char is one of surrogates.
302
            if (uCode < 0xDC00) { // We've got lead surrogate.
303
                if (leadSurrogate === -1) {
304
                    leadSurrogate = uCode;
305
                    continue;
306
                } else {
307
                    leadSurrogate = uCode;
308
                    // Double lead surrogate found.
309
                    uCode = UNASSIGNED;
310
                }
311
            } else { // We've got trail surrogate.
312
                if (leadSurrogate !== -1) {
313
                    uCode = 0x10000 + (leadSurrogate - 0xD800) * 0x400 + (uCode - 0xDC00);
314
                    leadSurrogate = -1;
315
                } else {
316
                    // Incomplete surrogate pair - only trail surrogate found.
317
                    uCode = UNASSIGNED;
318
                }
319
                
320
            }
321
        }
322
        else if (leadSurrogate !== -1) {
323
            // Incomplete surrogate pair - only lead surrogate found.
324
            nextChar = uCode; uCode = UNASSIGNED; // Write an error, then current char.
325
            leadSurrogate = -1;
326
        }
327
328
        // 2. Convert uCode character.
329
        var dbcsCode = UNASSIGNED;
330
        if (seqObj !== undefined && uCode != UNASSIGNED) { // We are in the middle of the sequence
331
            var resCode = seqObj[uCode];
332
            if (typeof resCode === 'object') { // Sequence continues.
333
                seqObj = resCode;
334
                continue;
335
336
            } else if (typeof resCode == 'number') { // Sequence finished. Write it.
337
                dbcsCode = resCode;
338
339
            } else if (resCode == undefined) { // Current character is not part of the sequence.
340
341
                // Try default character for this sequence
342
                resCode = seqObj[DEF_CHAR];
343
                if (resCode !== undefined) {
344
                    dbcsCode = resCode; // Found. Write it.
345
                    nextChar = uCode; // Current character will be written too in the next iteration.
346
347
                } else {
348
                    // TODO: What if we have no default? (resCode == undefined)
349
                    // Then, we should write first char of the sequence as-is and try the rest recursively.
350
                    // Didn't do it for now because no encoding has this situation yet.
351
                    // Currently, just skip the sequence and write current char.
352
                }
353
            }
354
            seqObj = undefined;
355
        }
356
        else if (uCode >= 0) {  // Regular character
357
            var subtable = this.encodeTable[uCode >> 8];
358
            if (subtable !== undefined)
359
                dbcsCode = subtable[uCode & 0xFF];
360
            
361
            if (dbcsCode <= SEQ_START) { // Sequence start
362
                seqObj = this.encodeTableSeq[SEQ_START-dbcsCode];
363
                continue;
364
            }
365
366
            if (dbcsCode == UNASSIGNED && this.gb18030) {
367
                // Use GB18030 algorithm to find character(s) to write.
368
                var idx = findIdx(this.gb18030.uChars, uCode);
369
                if (idx != -1) {
370
                    var dbcsCode = this.gb18030.gbChars[idx] + (uCode - this.gb18030.uChars[idx]);
371
                    newBuf[j++] = 0x81 + Math.floor(dbcsCode / 12600); dbcsCode = dbcsCode % 12600;
372
                    newBuf[j++] = 0x30 + Math.floor(dbcsCode / 1260); dbcsCode = dbcsCode % 1260;
373
                    newBuf[j++] = 0x81 + Math.floor(dbcsCode / 10); dbcsCode = dbcsCode % 10;
374
                    newBuf[j++] = 0x30 + dbcsCode;
375
                    continue;
376
                }
377
            }
378
        }
379
380
        // 3. Write dbcsCode character.
381
        if (dbcsCode === UNASSIGNED)
382
            dbcsCode = this.defaultCharSingleByte;
383
        
384
        if (dbcsCode < 0x100) {
385
            newBuf[j++] = dbcsCode;
386
        }
387
        else if (dbcsCode < 0x10000) {
388
            newBuf[j++] = dbcsCode >> 8;   // high byte
389
            newBuf[j++] = dbcsCode & 0xFF; // low byte
390
        }
391
        else {
392
            newBuf[j++] = dbcsCode >> 16;
393
            newBuf[j++] = (dbcsCode >> 8) & 0xFF;
394
            newBuf[j++] = dbcsCode & 0xFF;
395
        }
396
    }
397
398
    this.seqObj = seqObj;
399
    this.leadSurrogate = leadSurrogate;
400
    return newBuf.slice(0, j);
401
}
402
403
DBCSEncoder.prototype.end = function() {
404
    if (this.leadSurrogate === -1 && this.seqObj === undefined)
405
        return; // All clean. Most often case.
406
407
    var newBuf = Buffer.alloc(10), j = 0;
408
409
    if (this.seqObj) { // We're in the sequence.
410
        var dbcsCode = this.seqObj[DEF_CHAR];
411
        if (dbcsCode !== undefined) { // Write beginning of the sequence.
412
            if (dbcsCode < 0x100) {
413
                newBuf[j++] = dbcsCode;
414
            }
415
            else {
416
                newBuf[j++] = dbcsCode >> 8;   // high byte
417
                newBuf[j++] = dbcsCode & 0xFF; // low byte
418
            }
419
        } else {
420
            // See todo above.
421
        }
422
        this.seqObj = undefined;
423
    }
424
425
    if (this.leadSurrogate !== -1) {
426
        // Incomplete surrogate pair - only lead surrogate found.
427
        newBuf[j++] = this.defaultCharSingleByte;
428
        this.leadSurrogate = -1;
429
    }
430
    
431
    return newBuf.slice(0, j);
432
}
433
434
// Export for testing
435
DBCSEncoder.prototype.findIdx = findIdx;
436
437
438
// == Decoder ==================================================================
439
440
function DBCSDecoder(options, codec) {
441
    // Decoder state
442
    this.nodeIdx = 0;
443
    this.prevBuf = Buffer.alloc(0);
444
445
    // Static data
446
    this.decodeTables = codec.decodeTables;
447
    this.decodeTableSeq = codec.decodeTableSeq;
448
    this.defaultCharUnicode = codec.defaultCharUnicode;
449
    this.gb18030 = codec.gb18030;
450
}
451
452
DBCSDecoder.prototype.write = function(buf) {
453
    var newBuf = Buffer.alloc(buf.length*2),
454
        nodeIdx = this.nodeIdx, 
455
        prevBuf = this.prevBuf, prevBufOffset = this.prevBuf.length,
456
        seqStart = -this.prevBuf.length, // idx of the start of current parsed sequence.
457
        uCode;
458
459
    if (prevBufOffset > 0) // Make prev buf overlap a little to make it easier to slice later.
460
        prevBuf = Buffer.concat([prevBuf, buf.slice(0, 10)]);
461
    
462
    for (var i = 0, j = 0; i < buf.length; i++) {
463
        var curByte = (i >= 0) ? buf[i] : prevBuf[i + prevBufOffset];
464
465
        // Lookup in current trie node.
466
        var uCode = this.decodeTables[nodeIdx][curByte];
467
468
        if (uCode >= 0) { 
469
            // Normal character, just use it.
470
        }
471
        else if (uCode === UNASSIGNED) { // Unknown char.
472
            // TODO: Callback with seq.
473
            //var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
474
            i = seqStart; // Try to parse again, after skipping first byte of the sequence ('i' will be incremented by 'for' cycle).
475
            uCode = this.defaultCharUnicode.charCodeAt(0);
476
        }
477
        else if (uCode === GB18030_CODE) {
478
            var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
479
            var ptr = (curSeq[0]-0x81)*12600 + (curSeq[1]-0x30)*1260 + (curSeq[2]-0x81)*10 + (curSeq[3]-0x30);
480
            var idx = findIdx(this.gb18030.gbChars, ptr);
481
            uCode = this.gb18030.uChars[idx] + ptr - this.gb18030.gbChars[idx];
482
        }
483
        else if (uCode <= NODE_START) { // Go to next trie node.
484
            nodeIdx = NODE_START - uCode;
485
            continue;
486
        }
487
        else if (uCode <= SEQ_START) { // Output a sequence of chars.
488
            var seq = this.decodeTableSeq[SEQ_START - uCode];
489
            for (var k = 0; k < seq.length - 1; k++) {
490
                uCode = seq[k];
491
                newBuf[j++] = uCode & 0xFF;
492
                newBuf[j++] = uCode >> 8;
493
            }
494
            uCode = seq[seq.length-1];
495
        }
496
        else
497
            throw new Error("iconv-lite internal error: invalid decoding table value " + uCode + " at " + nodeIdx + "/" + curByte);
498
499
        // Write the character to buffer, handling higher planes using surrogate pair.
500
        if (uCode > 0xFFFF) { 
501
            uCode -= 0x10000;
502
            var uCodeLead = 0xD800 + Math.floor(uCode / 0x400);
503
            newBuf[j++] = uCodeLead & 0xFF;
504
            newBuf[j++] = uCodeLead >> 8;
505
506
            uCode = 0xDC00 + uCode % 0x400;
507
        }
508
        newBuf[j++] = uCode & 0xFF;
509
        newBuf[j++] = uCode >> 8;
510
511
        // Reset trie node.
512
        nodeIdx = 0; seqStart = i+1;
513
    }
514
515
    this.nodeIdx = nodeIdx;
516
    this.prevBuf = (seqStart >= 0) ? buf.slice(seqStart) : prevBuf.slice(seqStart + prevBufOffset);
517
    return newBuf.slice(0, j).toString('ucs2');
518
}
519
520
DBCSDecoder.prototype.end = function() {
521
    var ret = '';
522
523
    // Try to parse all remaining chars.
524
    while (this.prevBuf.length > 0) {
525
        // Skip 1 character in the buffer.
526
        ret += this.defaultCharUnicode;
527
        var buf = this.prevBuf.slice(1);
528
529
        // Parse remaining as usual.
530
        this.prevBuf = Buffer.alloc(0);
531
        this.nodeIdx = 0;
532
        if (buf.length > 0)
533
            ret += this.write(buf);
534
    }
535
536
    this.nodeIdx = 0;
537
    return ret;
538
}
539
540
// Binary search for GB18030. Returns largest i such that table[i] <= val.
541
function findIdx(table, val) {
542
    if (table[0] > val)
543
        return -1;
544
545
    var l = 0, r = table.length;
546
    while (l < r-1) { // always table[l] <= val < table[r]
547
        var mid = l + Math.floor((r-l+1)/2);
548
        if (table[mid] <= val)
549
            l = mid;
550
        else
551
            r = mid;
552
    }
553
    return l;
554
}
555
556