Passed
Push — master ( 2f7ce0...4f126f )
by Rafael S.
02:01
created

lib/utf8-buffer.js (1 issue)

1
/*
2
 * Copyright (c) 2018 Rafael da Silva Rocha.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining
5
 * a copy of this software and associated documentation files (the
6
 * "Software"), to deal in the Software without restriction, including
7
 * without limitation the rights to use, copy, modify, merge, publish,
8
 * distribute, sublicense, and/or sell copies of the Software, and to
9
 * permit persons to whom the Software is furnished to do so, subject to
10
 * the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be
13
 * included in all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 */
24
25
/**
26
 * @fileoverview Functions to serialize and deserialize UTF-8 strings.
27
 * @see https://github.com/rochars/utf8-buffer
28
 * @see https://encoding.spec.whatwg.org/#the-encoding
29
 * @see https://encoding.spec.whatwg.org/#utf-8-encoder
30
 */
31
32
import utf8BufferSize from 'utf8-buffer-size';
33
34
/**
35
 * Read a string of UTF-8 characters from a byte buffer.
36
 * Invalid characters are replaced with 'REPLACEMENT CHARACTER' (U+FFFD).
37
 * @see https://encoding.spec.whatwg.org/#the-encoding
38
 * @see https://stackoverflow.com/a/34926911
39
 * @param {!Uint8Array|!Array<!number>} buffer A byte buffer.
40
 * @param {number=} index The index to read.
41
 * @param {?number=} len The number of bytes to read.
42
 *    If len is undefined will read until the end of the buffer.
43
 * @return {string}
44
 */
45
export function unpack(buffer, index=0, len=undefined) {
46
  len = len !== undefined ? index + len : buffer.length;
47
  /** @type {string} */
48
  let str = "";
49
  while(index < len) {
50
    /** @type {number} */
51
    let lowerBoundary = 0x80;
52
    /** @type {number} */
53
    let upperBoundary = 0xBF;
54
    /** @type {boolean} */
55
    let replace = false;
56
    /** @type {number} */
57
    let charCode = buffer[index++];
58
    if (charCode >= 0x00 && charCode <= 0x7F) {
59
      str += String.fromCharCode(charCode);
60
    } else {
61
      /** @type {number} */
62
      let count = 0;
63
      if (charCode >= 0xC2 && charCode <= 0xDF) {
64
        count = 1;
65
      } else if (charCode >= 0xE0 && charCode <= 0xEF ) {
66
        count = 2;
67
        if (buffer[index] === 0xE0) {
68
          lowerBoundary = 0xA0;
69
        }
70
        if (buffer[index] === 0xED) {
71
          upperBoundary = 0x9F;
72
        }
73
      } else if (charCode >= 0xF0 && charCode <= 0xF4 ) {
74
        count = 3;
75
        if (buffer[index] === 0xF0) {
76
          lowerBoundary = 0x90;
77
        }
78
        if (buffer[index] === 0xF4) {
79
          upperBoundary = 0x8F;
80
        }
81
      } else {
82
        replace = true;
83
      }
84
      charCode = charCode & (1 << (8 - count - 1)) - 1;
85
      for (let i = 0; i < count; i++) {
86
        if (buffer[index] < lowerBoundary || buffer[index] > upperBoundary) {
87
          replace = true;
88
        }
89
        charCode = (charCode << 6) | (buffer[index] & 0x3f);
90
        index++;
91
      }
92
      if (replace) {
93
        str += String.fromCharCode(0xFFFD);
94
      } 
95
      else if (charCode <= 0xffff) {
96
        str += String.fromCharCode(charCode);
97
      } else {
98
        charCode -= 0x10000;
99
        str += String.fromCharCode(
100
          ((charCode >> 10) & 0x3ff) + 0xd800,
101
          (charCode & 0x3ff) + 0xdc00);
102
      }
103
    }
104
  }
105
  return str;
106
}
107
108
/**
109
 * Write a string of UTF-8 characters as a byte buffer.
110
 * @see https://encoding.spec.whatwg.org/#utf-8-encoder
111
 * @param {string} str The string to pack.
112
 * @return {!Uint8Array} The packed string.
113
 * @suppress {checkTypes}
114
 */
115
export function pack(str) {
116
  /** @type {!Uint8Array} */
117
  let bytes;
118
  if (typeof Uint8Array != 'undefined') {
119
    bytes = new Uint8Array(utf8BufferSize(str));
120
  } else {
121
    bytes = [];
122
  }
123
  let bufferIndex = 0;
124
  for (let i = 0, len = str.length; i < len; i++) {
125
    /** @type {number} */
126
    let codePoint = str.codePointAt(i);
127
    if (codePoint < 128) {
128
      bytes[bufferIndex] = codePoint;
129
      bufferIndex++;
130
    } else {
131
      /** @type {number} */
132
      let count = 0;
133
      /** @type {number} */
134
      let offset = 0;
135
      if (codePoint <= 0x07FF) {
136
        count = 1;
137
        offset = 0xC0;
138
      } else if(codePoint <= 0xFFFF) {
139
        count = 2;
140
        offset = 0xE0;
141
      } else if(codePoint <= 0x10FFFF) {
142
        count = 3;
143
        offset = 0xF0;
144
        i++;
0 ignored issues
show
Complexity Coding Style introduced by
You seem to be assigning a new value to the loop variable i here. Please check if this was indeed your intention. Even if it was, consider using another kind of loop instead.
Loading history...
145
      }
146
      bytes[bufferIndex] = (codePoint >> (6 * count)) + offset;
147
      bufferIndex++;
148
      while (count > 0) {
149
        bytes[bufferIndex] = 0x80 | (codePoint >> (6 * (count - 1)) & 0x3F);
150
        bufferIndex++;
151
        count--;
152
      }
153
    }
154
  }
155
  return bytes;
156
}
157