Passed
Push — master ( 13b350...038bd4 )
by Rafael S.
03:46
created

utf8-parser.js ➔ unpack   F

Complexity

Conditions 14

Size

Total Lines 61
Code Lines 43

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 14
eloc 43
dl 0
loc 61
rs 3.6
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like utf8-parser.js ➔ unpack often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
/*
2
 * Copyright (c) 2018 Rafael da Silva Rocha.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining
5
 * a copy of this software and associated documentation files (the
6
 * "Software"), to deal in the Software without restriction, including
7
 * without limitation the rights to use, copy, modify, merge, publish,
8
 * distribute, sublicense, and/or sell copies of the Software, and to
9
 * permit persons to whom the Software is furnished to do so, subject to
10
 * the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be
13
 * included in all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 */
24
25
/**
26
 * @fileoverview Encode and decode UTF8 strings to and from byte buffers.
27
 * @see https://github.com/rochars/byte-data
28
 * @see https://github.com/rochars/wavefile
29
 * @see https://encoding.spec.whatwg.org/#the-encoding
30
 * @see https://encoding.spec.whatwg.org/#utf-8-encoder
31
 */
32
33
/**
34
 * Read a string of UTF-8 characters from a byte buffer.
35
 * Invalid characters are replaced with 'REPLACEMENT CHARACTER' (U+FFFD).
36
 * @see https://encoding.spec.whatwg.org/#the-encoding
37
 * @see https://stackoverflow.com/a/34926911
38
 * @param {!Uint8Array|!Array<number>} buffer A byte buffer.
39
 * @param {number} [start=0] The buffer index to start reading.
40
 * @param {number} [end=0] The buffer index to stop reading.
41
 *   Assumes the buffer length if undefined.
42
 * @return {string}
43
 */
44
export function unpack(buffer, start=0, end=buffer.length) {
45
  /** @type {string} */
46
  let str = '';
47
  for(let index = start; index < end;) {
48
    /** @type {number} */
49
    let lowerBoundary = 0x80;
50
    /** @type {number} */
51
    let upperBoundary = 0xBF;
52
    /** @type {boolean} */
53
    let replace = false;
54
    /** @type {number} */
55
    let charCode = buffer[index++];
56
    if (charCode >= 0x00 && charCode <= 0x7F) {
57
      str += String.fromCharCode(charCode);
58
    } else {
59
      /** @type {number} */
60
      let count = 0;
61
      if (charCode >= 0xC2 && charCode <= 0xDF) {
62
        count = 1;
63
      } else if (charCode >= 0xE0 && charCode <= 0xEF ) {
64
        count = 2;
65
        if (buffer[index] === 0xE0) {
66
          lowerBoundary = 0xA0;
67
        }
68
        if (buffer[index] === 0xED) {
69
          upperBoundary = 0x9F;
70
        }
71
      } else if (charCode >= 0xF0 && charCode <= 0xF4 ) {
72
        count = 3;
73
        if (buffer[index] === 0xF0) {
74
          lowerBoundary = 0x90;
75
        }
76
        if (buffer[index] === 0xF4) {
77
          upperBoundary = 0x8F;
78
        }
79
      } else {
80
        replace = true;
81
      }
82
      charCode = charCode & (1 << (8 - count - 1)) - 1;
83
      for (let i = 0; i < count; i++) {
84
        if (buffer[index] < lowerBoundary || buffer[index] > upperBoundary) {
85
          replace = true;
86
        }
87
        charCode = (charCode << 6) | (buffer[index] & 0x3f);
88
        index++;
89
      }
90
      if (replace) {
91
        str += String.fromCharCode(0xFFFD);
92
      } 
93
      else if (charCode <= 0xffff) {
94
        str += String.fromCharCode(charCode);
95
      } else {
96
        charCode -= 0x10000;
97
        str += String.fromCharCode(
98
          ((charCode >> 10) & 0x3ff) + 0xd800,
99
          (charCode & 0x3ff) + 0xdc00);
100
      }
101
    }
102
  }
103
  return str;
104
}
105
106
/**
107
 * Write a string of UTF-8 characters to a byte buffer.
108
 * @see https://encoding.spec.whatwg.org/#utf-8-encoder
109
 * @param {string} str The string to pack.
110
 * @param {!Uint8Array|!Array<number>} buffer The buffer to pack the string to.
111
 * @param {number=} index The buffer index to start writing.
112
 * @return {number} The next index to write in the buffer.
113
 */
114
export function pack(str, buffer, index=0) {
115
  /** @type {number} */
116
  let i = 0;
117
  /** @type {number} */
118
  let len = str.length;
119
  while (i < len) {
120
    /** @type {number} */
121
    let codePoint = str.codePointAt(i);
122
    if (codePoint < 128) {
123
      buffer[index] = codePoint;
124
      index++;
125
    } else {
126
      /** @type {number} */
127
      let count = 0;
128
      /** @type {number} */
129
      let offset = 0;
130
      if (codePoint <= 0x07FF) {
131
        count = 1;
132
        offset = 0xC0;
133
      } else if(codePoint <= 0xFFFF) {
134
        count = 2;
135
        offset = 0xE0;
136
      } else if(codePoint <= 0x10FFFF) {
137
        count = 3;
138
        offset = 0xF0;
139
        i++;
140
      }
141
      buffer[index] = (codePoint >> (6 * count)) + offset;
142
      index++;
143
      while (count > 0) {
144
        buffer[index] = 0x80 | (codePoint >> (6 * (count - 1)) & 0x3F);
145
        index++;
146
        count--;
147
      }
148
    }
149
    i++;
150
  }
151
  return index;
152
}
153