Passed
Push — master ( 88ade9...ab862a )
by Rafael S.
02:21
created

utf8-parser.js ➔ pack   B

Complexity

Conditions 7

Size

Total Lines 39
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 29
dl 0
loc 39
rs 7.784
c 0
b 0
f 0
1
/*
2
 * Copyright (c) 2018 Rafael da Silva Rocha.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining
5
 * a copy of this software and associated documentation files (the
6
 * "Software"), to deal in the Software without restriction, including
7
 * without limitation the rights to use, copy, modify, merge, publish,
8
 * distribute, sublicense, and/or sell copies of the Software, and to
9
 * permit persons to whom the Software is furnished to do so, subject to
10
 * the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be
13
 * included in all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 */
24
25
/**
26
 * @fileoverview Functions to serialize and deserialize UTF-8 strings.
27
 * @see https://github.com/rochars/byte-data
28
 * @see https://encoding.spec.whatwg.org/#the-encoding
29
 * @see https://encoding.spec.whatwg.org/#utf-8-encoder
30
 */
31
32
/**
33
 * Read a string of UTF-8 characters from a byte buffer.
34
 * Invalid characters are replaced with 'REPLACEMENT CHARACTER' (U+FFFD).
35
 * @see https://encoding.spec.whatwg.org/#the-encoding
36
 * @see https://stackoverflow.com/a/34926911
37
 * @param {!Uint8Array|!Array<number>} buffer A byte buffer.
38
 * @param {number=} start The buffer index to start reading.
39
 * @param {?number=} end The buffer index to stop reading.
40
 *   Assumes the buffer length if undefined.
41
 * @return {string}
42
 */
43
export function unpack(buffer, start=0, end=buffer.length) {
44
  /** @type {string} */
45
  let str = '';
46
  for(let index = start; index < end;) {
47
    /** @type {number} */
48
    let lowerBoundary = 0x80;
49
    /** @type {number} */
50
    let upperBoundary = 0xBF;
51
    /** @type {boolean} */
52
    let replace = false;
53
    /** @type {number} */
54
    let charCode = buffer[index++];
55
    if (charCode >= 0x00 && charCode <= 0x7F) {
56
      str += String.fromCharCode(charCode);
57
    } else {
58
      /** @type {number} */
59
      let count = 0;
60
      if (charCode >= 0xC2 && charCode <= 0xDF) {
61
        count = 1;
62
      } else if (charCode >= 0xE0 && charCode <= 0xEF ) {
63
        count = 2;
64
        if (buffer[index] === 0xE0) {
65
          lowerBoundary = 0xA0;
66
        }
67
        if (buffer[index] === 0xED) {
68
          upperBoundary = 0x9F;
69
        }
70
      } else if (charCode >= 0xF0 && charCode <= 0xF4 ) {
71
        count = 3;
72
        if (buffer[index] === 0xF0) {
73
          lowerBoundary = 0x90;
74
        }
75
        if (buffer[index] === 0xF4) {
76
          upperBoundary = 0x8F;
77
        }
78
      } else {
79
        replace = true;
80
      }
81
      charCode = charCode & (1 << (8 - count - 1)) - 1;
82
      for (let i = 0; i < count; i++) {
83
        if (buffer[index] < lowerBoundary || buffer[index] > upperBoundary) {
84
          replace = true;
85
        }
86
        charCode = (charCode << 6) | (buffer[index] & 0x3f);
87
        index++;
88
      }
89
      if (replace) {
90
        str += String.fromCharCode(0xFFFD);
91
      } 
92
      else if (charCode <= 0xffff) {
93
        str += String.fromCharCode(charCode);
94
      } else {
95
        charCode -= 0x10000;
96
        str += String.fromCharCode(
97
          ((charCode >> 10) & 0x3ff) + 0xd800,
98
          (charCode & 0x3ff) + 0xdc00);
99
      }
100
    }
101
  }
102
  return str;
103
}
104
105
/**
106
 * Write a string of UTF-8 characters to a byte buffer.
107
 * @see https://encoding.spec.whatwg.org/#utf-8-encoder
108
 * @param {string} str The string to pack.
109
 * @param {!Uint8Array|!Array<number>} buffer The buffer to pack the string to.
110
 * @param {number=} index The buffer index to start writing.
111
 * @return {number} The next index to write in the buffer.
112
 */
113
export function pack(str, buffer, index=0) {
114
  /** @type {number} */
115
  let i = 0;
116
  /** @type {number} */
117
  let len = str.length;
118
  while (i < len) {
119
    /** @type {number} */
120
    let codePoint = str.codePointAt(i);
121
    if (codePoint < 128) {
122
      buffer[index] = codePoint;
123
      index++;
124
    } else {
125
      /** @type {number} */
126
      let count = 0;
127
      /** @type {number} */
128
      let offset = 0;
129
      if (codePoint <= 0x07FF) {
130
        count = 1;
131
        offset = 0xC0;
132
      } else if(codePoint <= 0xFFFF) {
133
        count = 2;
134
        offset = 0xE0;
135
      } else if(codePoint <= 0x10FFFF) {
136
        count = 3;
137
        offset = 0xF0;
138
        i++;
139
      }
140
      buffer[index] = (codePoint >> (6 * count)) + offset;
141
      index++;
142
      while (count > 0) {
143
        buffer[index] = 0x80 | (codePoint >> (6 * (count - 1)) & 0x3F);
144
        index++;
145
        count--;
146
      }
147
    }
148
    i++;
149
  }
150
  return index;
151
}
152