1
|
|
|
/* |
2
|
|
|
* Copyright (c) 2018 Rafael da Silva Rocha. |
3
|
|
|
* |
4
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining |
5
|
|
|
* a copy of this software and associated documentation files (the |
6
|
|
|
* "Software"), to deal in the Software without restriction, including |
7
|
|
|
* without limitation the rights to use, copy, modify, merge, publish, |
8
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to |
9
|
|
|
* permit persons to whom the Software is furnished to do so, subject to |
10
|
|
|
* the following conditions: |
11
|
|
|
* |
12
|
|
|
* The above copyright notice and this permission notice shall be |
13
|
|
|
* included in all copies or substantial portions of the Software. |
14
|
|
|
* |
15
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
16
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
17
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
18
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
19
|
|
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
20
|
|
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
21
|
|
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
22
|
|
|
* |
23
|
|
|
*/ |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @fileoverview Functions to serialize and deserialize UTF-8 strings. |
27
|
|
|
* @see https://github.com/rochars/utf8-buffer |
28
|
|
|
* @see https://encoding.spec.whatwg.org/#the-encoding |
29
|
|
|
* @see https://encoding.spec.whatwg.org/#utf-8-encoder |
30
|
|
|
*/ |
31
|
|
|
|
32
|
|
|
import utf8BufferSize from 'utf8-buffer-size'; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Read a string of UTF-8 characters from a byte buffer. |
36
|
|
|
* Invalid characters are replaced with 'REPLACEMENT CHARACTER' (U+FFFD). |
37
|
|
|
* @see https://encoding.spec.whatwg.org/#the-encoding |
38
|
|
|
* @see https://stackoverflow.com/a/34926911 |
39
|
|
|
* @param {!Uint8Array|!Array<!number>} buffer A byte buffer. |
40
|
|
|
* @param {number=} index The index to read. |
41
|
|
|
* @param {?number=} len The number of bytes to read. |
42
|
|
|
* If len is undefined will read until the end of the buffer. |
43
|
|
|
* @return {string} |
44
|
|
|
*/ |
45
|
|
|
export function unpack(buffer, index=0, len=undefined) { |
46
|
|
|
len = len !== undefined ? index + len : buffer.length; |
47
|
|
|
/** @type {string} */ |
48
|
|
|
let str = ""; |
49
|
|
|
while(index < len) { |
50
|
|
|
/** @type {number} */ |
51
|
|
|
let lowerBoundary = 0x80; |
52
|
|
|
/** @type {number} */ |
53
|
|
|
let upperBoundary = 0xBF; |
54
|
|
|
/** @type {boolean} */ |
55
|
|
|
let replace = false; |
56
|
|
|
/** @type {number} */ |
57
|
|
|
let charCode = buffer[index++]; |
58
|
|
|
if (charCode >= 0x00 && charCode <= 0x7F) { |
59
|
|
|
str += String.fromCharCode(charCode); |
60
|
|
|
} else { |
61
|
|
|
/** @type {number} */ |
62
|
|
|
let count = 0; |
63
|
|
|
if (charCode >= 0xC2 && charCode <= 0xDF) { |
64
|
|
|
count = 1; |
65
|
|
|
} else if (charCode >= 0xE0 && charCode <= 0xEF ) { |
66
|
|
|
count = 2; |
67
|
|
|
if (buffer[index] === 0xE0) { |
68
|
|
|
lowerBoundary = 0xA0; |
69
|
|
|
} |
70
|
|
|
if (buffer[index] === 0xED) { |
71
|
|
|
upperBoundary = 0x9F; |
72
|
|
|
} |
73
|
|
|
} else if (charCode >= 0xF0 && charCode <= 0xF4 ) { |
74
|
|
|
count = 3; |
75
|
|
|
if (buffer[index] === 0xF0) { |
76
|
|
|
lowerBoundary = 0x90; |
77
|
|
|
} |
78
|
|
|
if (buffer[index] === 0xF4) { |
79
|
|
|
upperBoundary = 0x8F; |
80
|
|
|
} |
81
|
|
|
} else { |
82
|
|
|
replace = true; |
83
|
|
|
} |
84
|
|
|
charCode = charCode & (1 << (8 - count - 1)) - 1; |
85
|
|
|
for (let i = 0; i < count; i++) { |
86
|
|
|
if (buffer[index] < lowerBoundary || buffer[index] > upperBoundary) { |
87
|
|
|
replace = true; |
88
|
|
|
} |
89
|
|
|
charCode = (charCode << 6) | (buffer[index] & 0x3f); |
90
|
|
|
index++; |
91
|
|
|
} |
92
|
|
|
if (replace) { |
93
|
|
|
str += String.fromCharCode(0xFFFD); |
94
|
|
|
} |
95
|
|
|
else if (charCode <= 0xffff) { |
96
|
|
|
str += String.fromCharCode(charCode); |
97
|
|
|
} else { |
98
|
|
|
charCode -= 0x10000; |
99
|
|
|
str += String.fromCharCode( |
100
|
|
|
((charCode >> 10) & 0x3ff) + 0xd800, |
101
|
|
|
(charCode & 0x3ff) + 0xdc00); |
102
|
|
|
} |
103
|
|
|
} |
104
|
|
|
} |
105
|
|
|
return str; |
106
|
|
|
} |
107
|
|
|
|
108
|
|
|
/** |
109
|
|
|
* Write a string of UTF-8 characters as a byte buffer. |
110
|
|
|
* @see https://encoding.spec.whatwg.org/#utf-8-encoder |
111
|
|
|
* @param {string} str The string to pack. |
112
|
|
|
* @return {!Uint8Array} The packed string. |
113
|
|
|
* @suppress {checkTypes} |
114
|
|
|
*/ |
115
|
|
|
export function pack(str) { |
116
|
|
|
/** @type {!Uint8Array} */ |
117
|
|
|
let bytes; |
118
|
|
|
if (typeof Uint8Array != 'undefined') { |
119
|
|
|
bytes = new Uint8Array(utf8BufferSize(str)); |
120
|
|
|
} else { |
121
|
|
|
bytes = []; |
122
|
|
|
} |
123
|
|
|
let bufferIndex = 0; |
124
|
|
|
for (let i = 0, len = str.length; i < len; i++) { |
125
|
|
|
/** @type {number} */ |
126
|
|
|
let codePoint = str.codePointAt(i); |
127
|
|
|
if (codePoint < 128) { |
128
|
|
|
bytes[bufferIndex] = codePoint; |
129
|
|
|
bufferIndex++; |
130
|
|
|
} else { |
131
|
|
|
/** @type {number} */ |
132
|
|
|
let count = 0; |
133
|
|
|
/** @type {number} */ |
134
|
|
|
let offset = 0; |
135
|
|
|
if (codePoint <= 0x07FF) { |
136
|
|
|
count = 1; |
137
|
|
|
offset = 0xC0; |
138
|
|
|
} else if(codePoint <= 0xFFFF) { |
139
|
|
|
count = 2; |
140
|
|
|
offset = 0xE0; |
141
|
|
|
} else if(codePoint <= 0x10FFFF) { |
142
|
|
|
count = 3; |
143
|
|
|
offset = 0xF0; |
144
|
|
|
i++; |
|
|
|
|
145
|
|
|
} |
146
|
|
|
bytes[bufferIndex] = (codePoint >> (6 * count)) + offset; |
147
|
|
|
bufferIndex++; |
148
|
|
|
while (count > 0) { |
149
|
|
|
bytes[bufferIndex] = 0x80 | (codePoint >> (6 * (count - 1)) & 0x3F); |
150
|
|
|
bufferIndex++; |
151
|
|
|
count--; |
152
|
|
|
} |
153
|
|
|
} |
154
|
|
|
} |
155
|
|
|
return bytes; |
156
|
|
|
} |
157
|
|
|
|