1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* webtrees: online genealogy |
5
|
|
|
* Copyright (C) 2021 webtrees development team |
6
|
|
|
* This program is free software: you can redistribute it and/or modify |
7
|
|
|
* it under the terms of the GNU General Public License as published by |
8
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
* (at your option) any later version. |
10
|
|
|
* This program is distributed in the hope that it will be useful, |
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
* GNU General Public License for more details. |
14
|
|
|
* You should have received a copy of the GNU General Public License |
15
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>. |
16
|
|
|
*/ |
17
|
|
|
|
18
|
|
|
declare(strict_types=1); |
19
|
|
|
|
20
|
|
|
namespace Fisharebest\Webtrees\Encodings; |
21
|
|
|
|
22
|
|
|
use InvalidArgumentException; |
23
|
|
|
|
24
|
|
|
use function chr; |
25
|
|
|
use function mb_substitute_character; |
26
|
|
|
use function preg_replace; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* Convert between (potentially invalid) UTF-8 and UTF-8. |
30
|
|
|
*/ |
31
|
|
|
class UTF8 extends AbstractEncoding |
32
|
|
|
{ |
33
|
|
|
public const NAME = 'UTF-8'; |
34
|
|
|
|
35
|
|
|
public const START_OF_STRING = "\u{0098}"; |
36
|
|
|
public const STRING_TERMINATOR = "\u{009C}"; |
37
|
|
|
public const NO_BREAK_SPACE = "\u{00A0}"; |
38
|
|
|
public const INVERTED_EXCLAMATION_MARK = "\u{00A1}"; |
39
|
|
|
public const CENT_SIGN = "\u{00A2}"; |
40
|
|
|
public const POUND_SIGN = "\u{00A3}"; |
41
|
|
|
public const CURRENCY_SIGN = "\u{00A4}"; |
42
|
|
|
public const YEN_SIGN = "\u{00A5}"; |
43
|
|
|
public const BROKEN_BAR = "\u{00A6}"; |
44
|
|
|
public const SECTION_SIGN = "\u{00A7}"; |
45
|
|
|
public const DIAERESIS = "\u{00A8}"; |
46
|
|
|
public const COPYRIGHT_SIGN = "\u{00A9}"; |
47
|
|
|
public const FEMININE_ORDINAL_INDICATOR = "\u{00AA}"; |
48
|
|
|
public const LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK = "\u{00AB}"; |
49
|
|
|
public const NOT_SIGN = "\u{00AC}"; |
50
|
|
|
public const SOFT_HYPHEN = "\u{00AD}"; |
51
|
|
|
public const REGISTERED_SIGN = "\u{00AE}"; |
52
|
|
|
public const MACRON = "\u{00AF}"; |
53
|
|
|
public const DEGREE_SIGN = "\u{00B0}"; |
54
|
|
|
public const PLUS_MINUS_SIGN = "\u{00B1}"; |
55
|
|
|
public const SUPERSCRIPT_TWO = "\u{00B2}"; |
56
|
|
|
public const SUPERSCRIPT_THREE = "\u{00B3}"; |
57
|
|
|
public const ACUTE_ACCENT = "\u{00B4}"; |
58
|
|
|
public const MICRO_SIGN = "\u{00B5}"; |
59
|
|
|
public const PILCROW_SIGN = "\u{00B6}"; |
60
|
|
|
public const MIDDLE_DOT = "\u{00B7}"; |
61
|
|
|
public const CEDILLA = "\u{00B8}"; |
62
|
|
|
public const SUPERSCRIPT_ONE = "\u{00B9}"; |
63
|
|
|
public const MASCULINE_ORDINAL_INDICATOR = "\u{00BA}"; |
64
|
|
|
public const RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK = "\u{00BB}"; |
65
|
|
|
public const VULGAR_FRACTION_ONE_QUARTER = "\u{00BC}"; |
66
|
|
|
public const VULGAR_FRACTION_ONE_HALF = "\u{00BD}"; |
67
|
|
|
public const VULGAR_FRACTION_THREE_QUARTERS = "\u{00BE}"; |
68
|
|
|
public const INVERTED_QUESTION_MARK = "\u{00BF}"; |
69
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_GRAVE = "\u{00C0}"; |
70
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_ACUTE = "\u{00C1}"; |
71
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX = "\u{00C2}"; |
72
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_TILDE = "\u{00C3}"; |
73
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS = "\u{00C4}"; |
74
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE = "\u{00C5}"; |
75
|
|
|
public const LATIN_CAPITAL_LETTER_AE = "\u{00C6}"; |
76
|
|
|
public const LATIN_CAPITAL_LETTER_C_WITH_CEDILLA = "\u{00C7}"; |
77
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_GRAVE = "\u{00C8}"; |
78
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_ACUTE = "\u{00C9}"; |
79
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX = "\u{00CA}"; |
80
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS = "\u{00CB}"; |
81
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_GRAVE = "\u{00CC}"; |
82
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_ACUTE = "\u{00CD}"; |
83
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX = "\u{00CE}"; |
84
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS = "\u{00CF}"; |
85
|
|
|
public const LATIN_CAPITAL_LETTER_ETH = "\u{00D0}"; |
86
|
|
|
public const LATIN_CAPITAL_LETTER_N_WITH_TILDE = "\u{00D1}"; |
87
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_GRAVE = "\u{00D2}"; |
88
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_ACUTE = "\u{00D3}"; |
89
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX = "\u{00D4}"; |
90
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_TILDE = "\u{00D5}"; |
91
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS = "\u{00D6}"; |
92
|
|
|
public const MULTIPLICATION_SIGN = "\u{00D7}"; |
93
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_STROKE = "\u{00D8}"; |
94
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_GRAVE = "\u{00D9}"; |
95
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_ACUTE = "\u{00DA}"; |
96
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX = "\u{00DB}"; |
97
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS = "\u{00DC}"; |
98
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_ACUTE = "\u{00DD}"; |
99
|
|
|
public const LATIN_CAPITAL_LETTER_THORN = "\u{00DE}"; |
100
|
|
|
public const LATIN_SMALL_LETTER_SHARP_S = "\u{00DF}"; |
101
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_GRAVE = "\u{00E0}"; |
102
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_ACUTE = "\u{00E1}"; |
103
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX = "\u{00E2}"; |
104
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_TILDE = "\u{00E3}"; |
105
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_DIAERESIS = "\u{00E4}"; |
106
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_RING_ABOVE = "\u{00E5}"; |
107
|
|
|
public const LATIN_SMALL_LETTER_AE = "\u{00E6}"; |
108
|
|
|
public const LATIN_SMALL_LETTER_C_WITH_CEDILLA = "\u{00E7}"; |
109
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_GRAVE = "\u{00E8}"; |
110
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_ACUTE = "\u{00E9}"; |
111
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX = "\u{00EA}"; |
112
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_DIAERESIS = "\u{00EB}"; |
113
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_GRAVE = "\u{00EC}"; |
114
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_ACUTE = "\u{00ED}"; |
115
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX = "\u{00EE}"; |
116
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_DIAERESIS = "\u{00EF}"; |
117
|
|
|
public const LATIN_SMALL_LETTER_ETH = "\u{00F0}"; |
118
|
|
|
public const LATIN_SMALL_LETTER_N_WITH_TILDE = "\u{00F1}"; |
119
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_GRAVE = "\u{00F2}"; |
120
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_ACUTE = "\u{00F3}"; |
121
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX = "\u{00F4}"; |
122
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_TILDE = "\u{00F5}"; |
123
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_DIAERESIS = "\u{00F6}"; |
124
|
|
|
public const DIVISION_SIGN = "\u{00F7}"; |
125
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_STROKE = "\u{00F8}"; |
126
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_GRAVE = "\u{00F9}"; |
127
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_ACUTE = "\u{00FA}"; |
128
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX = "\u{00FB}"; |
129
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS = "\u{00FC}"; |
130
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_ACUTE = "\u{00FD}"; |
131
|
|
|
public const LATIN_SMALL_LETTER_THORN = "\u{00FE}"; |
132
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = "\u{00FF}"; |
133
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_MACRON = "\u{0100}"; |
134
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_MACRON = "\u{0101}"; |
135
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_BREVE = "\u{0102}"; |
136
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_BREVE = "\u{0103}"; |
137
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_OGONEK = "\u{0104}"; |
138
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_OGONEK = "\u{0105}"; |
139
|
|
|
public const LATIN_CAPITAL_LETTER_C_WITH_ACUTE = "\u{0106}"; |
140
|
|
|
public const LATIN_SMALL_LETTER_C_WITH_ACUTE = "\u{0107}"; |
141
|
|
|
public const LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX = "\u{0108}"; |
142
|
|
|
public const LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX = "\u{0109}"; |
143
|
|
|
public const LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE = "\u{010A}"; |
144
|
|
|
public const LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE = "\u{010B}"; |
145
|
|
|
public const LATIN_CAPITAL_LETTER_C_WITH_CARON = "\u{010C}"; |
146
|
|
|
public const LATIN_SMALL_LETTER_C_WITH_CARON = "\u{010D}"; |
147
|
|
|
public const LATIN_CAPITAL_LETTER_D_WITH_CARON = "\u{010E}"; |
148
|
|
|
public const LATIN_SMALL_LETTER_D_WITH_CARON = "\u{010F}"; |
149
|
|
|
public const LATIN_CAPITAL_LETTER_D_WITH_STROKE = "\u{0110}"; |
150
|
|
|
public const LATIN_SMALL_LETTER_D_WITH_STROKE = "\u{0111}"; |
151
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_MACRON = "\u{0112}"; |
152
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_MACRON = "\u{0113}"; |
153
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_BREVE = "\u{0114}"; |
154
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_BREVE = "\u{0115}"; |
155
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE = "\u{0116}"; |
156
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE = "\u{0117}"; |
157
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_OGONEK = "\u{0118}"; |
158
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_OGONEK = "\u{0119}"; |
159
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CARON = "\u{011A}"; |
160
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CARON = "\u{011B}"; |
161
|
|
|
public const LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX = "\u{011C}"; |
162
|
|
|
public const LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX = "\u{011D}"; |
163
|
|
|
public const LATIN_CAPITAL_LETTER_G_WITH_BREVE = "\u{011E}"; |
164
|
|
|
public const LATIN_SMALL_LETTER_G_WITH_BREVE = "\u{011F}"; |
165
|
|
|
public const LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE = "\u{0120}"; |
166
|
|
|
public const LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE = "\u{0121}"; |
167
|
|
|
public const LATIN_CAPITAL_LETTER_G_WITH_CEDILLA = "\u{0122}"; |
168
|
|
|
public const LATIN_SMALL_LETTER_G_WITH_CEDILLA = "\u{0123}"; |
169
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX = "\u{0124}"; |
170
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX = "\u{0125}"; |
171
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_STROKE = "\u{0126}"; |
172
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_STROKE = "\u{0127}"; |
173
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_TILDE = "\u{0128}"; |
174
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_TILDE = "\u{0129}"; |
175
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_MACRON = "\u{012A}"; |
176
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_MACRON = "\u{012B}"; |
177
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_BREVE = "\u{012C}"; |
178
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_BREVE = "\u{012D}"; |
179
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_OGONEK = "\u{012E}"; |
180
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_OGONEK = "\u{012F}"; |
181
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = "\u{0130}"; |
182
|
|
|
public const LATIN_SMALL_LETTER_DOTLESS_I = "\u{0131}"; |
183
|
|
|
public const LATIN_CAPITAL_LIGATURE_IJ = "\u{0132}"; |
184
|
|
|
public const LATIN_SMALL_LIGATURE_IJ = "\u{0133}"; |
185
|
|
|
public const LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX = "\u{0134}"; |
186
|
|
|
public const LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX = "\u{0135}"; |
187
|
|
|
public const LATIN_CAPITAL_LETTER_K_WITH_CEDILLA = "\u{0136}"; |
188
|
|
|
public const LATIN_SMALL_LETTER_K_WITH_CEDILLA = "\u{0137}"; |
189
|
|
|
public const LATIN_SMALL_LETTER_KRA = "\u{0138}"; |
190
|
|
|
public const LATIN_CAPITAL_LETTER_L_WITH_ACUTE = "\u{0139}"; |
191
|
|
|
public const LATIN_SMALL_LETTER_L_WITH_ACUTE = "\u{013A}"; |
192
|
|
|
public const LATIN_CAPITAL_LETTER_L_WITH_CEDILLA = "\u{013B}"; |
193
|
|
|
public const LATIN_SMALL_LETTER_L_WITH_CEDILLA = "\u{013C}"; |
194
|
|
|
public const LATIN_CAPITAL_LETTER_L_WITH_CARON = "\u{013D}"; |
195
|
|
|
public const LATIN_SMALL_LETTER_L_WITH_CARON = "\u{013E}"; |
196
|
|
|
public const LATIN_CAPITAL_LETTER_L_WITH_MIDDLE_DOT = "\u{013F}"; |
197
|
|
|
public const LATIN_SMALL_LETTER_L_WITH_MIDDLE_DOT = "\u{0140}"; |
198
|
|
|
public const LATIN_CAPITAL_LETTER_L_WITH_STROKE = "\u{0141}"; |
199
|
|
|
public const LATIN_SMALL_LETTER_L_WITH_STROKE = "\u{0142}"; |
200
|
|
|
public const LATIN_CAPITAL_LETTER_N_WITH_ACUTE = "\u{0143}"; |
201
|
|
|
public const LATIN_SMALL_LETTER_N_WITH_ACUTE = "\u{0144}"; |
202
|
|
|
public const LATIN_CAPITAL_LETTER_N_WITH_CEDILLA = "\u{0145}"; |
203
|
|
|
public const LATIN_SMALL_LETTER_N_WITH_CEDILLA = "\u{0146}"; |
204
|
|
|
public const LATIN_CAPITAL_LETTER_N_WITH_CARON = "\u{0147}"; |
205
|
|
|
public const LATIN_SMALL_LETTER_N_WITH_CARON = "\u{0148}"; |
206
|
|
|
public const LATIN_SMALL_LETTER_N_PRECEDED_BY_APOSTROPHE = "\u{0149}"; |
207
|
|
|
public const LATIN_CAPITAL_LETTER_ENG = "\u{014A}"; |
208
|
|
|
public const LATIN_SMALL_LETTER_ENG = "\u{014B}"; |
209
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_MACRON = "\u{014C}"; |
210
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_MACRON = "\u{014D}"; |
211
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_BREVE = "\u{014E}"; |
212
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_BREVE = "\u{014F}"; |
213
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE = "\u{0150}"; |
214
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE = "\u{0151}"; |
215
|
|
|
public const LATIN_CAPITAL_LIGATURE_OE = "\u{0152}"; |
216
|
|
|
public const LATIN_SMALL_LIGATURE_OE = "\u{0153}"; |
217
|
|
|
public const LATIN_CAPITAL_LETTER_R_WITH_ACUTE = "\u{0154}"; |
218
|
|
|
public const LATIN_SMALL_LETTER_R_WITH_ACUTE = "\u{0155}"; |
219
|
|
|
public const LATIN_CAPITAL_LETTER_R_WITH_CEDILLA = "\u{0156}"; |
220
|
|
|
public const LATIN_SMALL_LETTER_R_WITH_CEDILLA = "\u{0157}"; |
221
|
|
|
public const LATIN_CAPITAL_LETTER_R_WITH_CARON = "\u{0158}"; |
222
|
|
|
public const LATIN_SMALL_LETTER_R_WITH_CARON = "\u{0159}"; |
223
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_ACUTE = "\u{015A}"; |
224
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_ACUTE = "\u{015B}"; |
225
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX = "\u{015C}"; |
226
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX = "\u{015D}"; |
227
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_CEDILLA = "\u{015E}"; |
228
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_CEDILLA = "\u{015F}"; |
229
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_CARON = "\u{0160}"; |
230
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_CARON = "\u{0161}"; |
231
|
|
|
public const LATIN_CAPITAL_LETTER_T_WITH_CEDILLA = "\u{0162}"; |
232
|
|
|
public const LATIN_SMALL_LETTER_T_WITH_CEDILLA = "\u{0163}"; |
233
|
|
|
public const LATIN_CAPITAL_LETTER_T_WITH_CARON = "\u{0164}"; |
234
|
|
|
public const LATIN_SMALL_LETTER_T_WITH_CARON = "\u{0165}"; |
235
|
|
|
public const LATIN_CAPITAL_LETTER_T_WITH_STROKE = "\u{0166}"; |
236
|
|
|
public const LATIN_SMALL_LETTER_T_WITH_STROKE = "\u{0167}"; |
237
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_TILDE = "\u{0168}"; |
238
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_TILDE = "\u{0169}"; |
239
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_MACRON = "\u{016A}"; |
240
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_MACRON = "\u{016B}"; |
241
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_BREVE = "\u{016C}"; |
242
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_BREVE = "\u{016D}"; |
243
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE = "\u{016E}"; |
244
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_RING_ABOVE = "\u{016F}"; |
245
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE = "\u{0170}"; |
246
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE = "\u{0171}"; |
247
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_OGONEK = "\u{0172}"; |
248
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_OGONEK = "\u{0173}"; |
249
|
|
|
public const LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX = "\u{0174}"; |
250
|
|
|
public const LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX = "\u{0175}"; |
251
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX = "\u{0176}"; |
252
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX = "\u{0177}"; |
253
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS = "\u{0178}"; |
254
|
|
|
public const LATIN_CAPITAL_LETTER_Z_WITH_ACUTE = "\u{0179}"; |
255
|
|
|
public const LATIN_SMALL_LETTER_Z_WITH_ACUTE = "\u{017A}"; |
256
|
|
|
public const LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE = "\u{017B}"; |
257
|
|
|
public const LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE = "\u{017C}"; |
258
|
|
|
public const LATIN_CAPITAL_LETTER_Z_WITH_CARON = "\u{017D}"; |
259
|
|
|
public const LATIN_SMALL_LETTER_Z_WITH_CARON = "\u{017E}"; |
260
|
|
|
public const LATIN_SMALL_LETTER_LONG_S = "\u{017F}"; |
261
|
|
|
public const LATIN_SMALL_LETTER_B_WITH_STROKE = "\u{0180}"; |
262
|
|
|
public const LATIN_CAPITAL_LETTER_B_WITH_HOOK = "\u{0181}"; |
263
|
|
|
public const LATIN_CAPITAL_LETTER_B_WITH_TOPBAR = "\u{0182}"; |
264
|
|
|
public const LATIN_SMALL_LETTER_B_WITH_TOPBAR = "\u{0183}"; |
265
|
|
|
public const LATIN_CAPITAL_LETTER_F_WITH_HOOK = "\u{0191}"; |
266
|
|
|
public const LATIN_SMALL_LETTER_F_WITH_HOOK = "\u{0192}"; |
267
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_HORN = "\u{01A1}"; |
268
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_HORN = "\u{01A0}"; |
269
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_HORN = "\u{01AF}"; |
270
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_HORN = "\u{01B0}"; |
271
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_CARON = "\u{01CD}"; |
272
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_CARON = "\u{01CE}"; |
273
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_CARON = "\u{01CF}"; |
274
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_CARON = "\u{01D0}"; |
275
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_CARON = "\u{01D1}"; |
276
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_CARON = "\u{01D2}"; |
277
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_CARON = "\u{01D3}"; |
278
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_CARON = "\u{01D4}"; |
279
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON = "\u{01D5}"; |
280
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON = "\u{01D6}"; |
281
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE = "\u{01D7}"; |
282
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE = "\u{01D8}"; |
283
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON = "\u{01D9}"; |
284
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON = "\u{01DA}"; |
285
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE = "\u{01DB}"; |
286
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE = "\u{01DC}"; |
287
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON = "\u{01DE}"; |
288
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON = "\u{01DF}"; |
289
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON = "\u{01E0}"; |
290
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON = "\u{01E1}"; |
291
|
|
|
public const LATIN_CAPITAL_LETTER_AE_WITH_MACRON = "\u{01E2}"; |
292
|
|
|
public const LATIN_SMALL_LETTER_AE_WITH_MACRON = "\u{01E3}"; |
293
|
|
|
public const LATIN_CAPITAL_LETTER_G_WITH_CARON = "\u{01E6}"; |
294
|
|
|
public const LATIN_SMALL_LETTER_G_WITH_CARON = "\u{01E7}"; |
295
|
|
|
public const LATIN_CAPITAL_LETTER_K_WITH_CARON = "\u{01E8}"; |
296
|
|
|
public const LATIN_SMALL_LETTER_K_WITH_CARON = "\u{01E9}"; |
297
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_OGONEK = "\u{01EA}"; |
298
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_OGONEK = "\u{01EB}"; |
299
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON = "\u{01EC}"; |
300
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON = "\u{01ED}"; |
301
|
|
|
public const LATIN_SMALL_LETTER_J_WITH_CARON = "\u{01F0}"; |
302
|
|
|
public const LATIN_CAPITAL_LETTER_G_WITH_ACUTE = "\u{01F4}"; |
303
|
|
|
public const LATIN_SMALL_LETTER_G_WITH_ACUTE = "\u{01F5}"; |
304
|
|
|
public const LATIN_CAPITAL_LETTER_N_WITH_GRAVE = "\u{01F8}"; |
305
|
|
|
public const LATIN_SMALL_LETTER_N_WITH_GRAVE = "\u{01F9}"; |
306
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE = "\u{01FA}"; |
307
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE = "\u{01FB}"; |
308
|
|
|
public const LATIN_CAPITAL_LETTER_AE_WITH_ACUTE = "\u{01FC}"; |
309
|
|
|
public const LATIN_SMALL_LETTER_AE_WITH_ACUTE = "\u{01FD}"; |
310
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE = "\u{01FE}"; |
311
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE = "\u{01FF}"; |
312
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW = "\u{0218}"; |
313
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW = "\u{0219}"; |
314
|
|
|
public const LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW = "\u{021A}"; |
315
|
|
|
public const LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW = "\u{021B}"; |
316
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_CARON = "\u{021E}"; |
317
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_CARON = "\u{021F}"; |
318
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE = "\u{0226}"; |
319
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE = "\u{0227}"; |
320
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CEDILLA = "\u{0228}"; |
321
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CEDILLA = "\u{0229}"; |
322
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON = "\u{022A}"; |
323
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON = "\u{022B}"; |
324
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON = "\u{022C}"; |
325
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON = "\u{022D}"; |
326
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE = "\u{022E}"; |
327
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE = "\u{022F}"; |
328
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON = "\u{0230}"; |
329
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON = "\u{0231}"; |
330
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_MACRON = "\u{0232}"; |
331
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_MACRON = "\u{0233}"; |
332
|
|
|
public const MODIFIER_LETTER_PRIME = "\u{02B9}"; |
333
|
|
|
public const MODIFIER_LETTER_DOUBLE_PRIME = "\u{02BA}"; |
334
|
|
|
public const MODIFIER_LETTER_TURNED_COMMA = "\u{02BB}"; |
335
|
|
|
public const MODIFIER_LETTER_APOSTROPHE = "\u{02BC}"; |
336
|
|
|
public const MODIFIER_LETTER_CIRCUMFLEX_ACCENT = "\u{02C6}"; |
337
|
|
|
public const CARON = "\u{02C7}"; |
338
|
|
|
public const BREVE = "\u{02D8}"; |
339
|
|
|
public const DOT_ABOVE = "\u{02D9}"; |
340
|
|
|
public const RING_ABOVE = "\u{02DA}"; |
341
|
|
|
public const OGONEK = "\u{02DB}"; |
342
|
|
|
public const SMALL_TILDE = "\u{02DC}"; |
343
|
|
|
public const DOUBLE_ACUTE_ACCENT = "\u{02DD}"; |
344
|
|
|
public const COMBINING_GRAVE_ACCENT = "\u{0300}"; |
345
|
|
|
public const COMBINING_ACUTE_ACCENT = "\u{0301}"; |
346
|
|
|
public const COMBINING_CIRCUMFLEX_ACCENT = "\u{0302}"; |
347
|
|
|
public const COMBINING_TILDE = "\u{0303}"; |
348
|
|
|
public const COMBINING_MACRON = "\u{0304}"; |
349
|
|
|
public const COMBINING_OVERLINE = "\u{0305}"; |
350
|
|
|
public const COMBINING_BREVE = "\u{0306}"; |
351
|
|
|
public const COMBINING_DOT_ABOVE = "\u{0307}"; |
352
|
|
|
public const COMBINING_DIAERESIS = "\u{0308}"; |
353
|
|
|
public const COMBINING_HOOK_ABOVE = "\u{0309}"; |
354
|
|
|
public const COMBINING_RING_ABOVE = "\u{030A}"; |
355
|
|
|
public const COMBINING_DOUBLE_ACUTE_ACCENT = "\u{030B}"; |
356
|
|
|
public const COMBINING_CARON = "\u{030C}"; |
357
|
|
|
public const COMBINING_CANDRABINDU = "\u{0310}"; |
358
|
|
|
public const COMBINING_COMMA_ABOVE = "\u{0313}"; |
359
|
|
|
public const COMBINING_COMMA_ABOVE_RIGHT = "\u{0315}"; |
360
|
|
|
public const COMBINING_HORN = "\u{031B}"; |
361
|
|
|
public const COMBINING_LEFT_HALF_RING_BELOW = "\u{031C}"; |
362
|
|
|
public const COMBINING_DOT_BELOW = "\u{0323}"; |
363
|
|
|
public const COMBINING_DIAERESIS_BELOW = "\u{0324}"; |
364
|
|
|
public const COMBINING_RING_BELOW = "\u{0325}"; |
365
|
|
|
public const COMBINING_COMMA_BELOW = "\u{0326}"; |
366
|
|
|
public const COMBINING_CEDILLA = "\u{0327}"; |
367
|
|
|
public const COMBINING_OGONEK = "\u{0328}"; |
368
|
|
|
public const COMBINING_BRIDGE_BELOW = "\u{032A}"; |
369
|
|
|
public const COMBINING_BREVE_BELOW = "\u{032E}"; |
370
|
|
|
public const COMBINING_LOW_LINE = "\u{0332}"; |
371
|
|
|
public const COMBINING_DOUBLE_LOW_LINE = "\u{0333}"; |
372
|
|
|
public const COMBINING_LONG_SOLIDUS_OVERLAY = "\u{0338}"; |
373
|
|
|
public const COMBINING_DOUBLE_TILDE = "\u{0360}"; |
374
|
|
|
public const COMBINING_DOUBLE_INVERTED_BREVE = "\u{0361}"; |
375
|
|
|
public const GREEK_CAPITAL_LETTER_GAMMA = "\u{0393}"; |
376
|
|
|
public const GREEK_CAPITAL_LETTER_THETA = "\u{0398}"; |
377
|
|
|
public const GREEK_CAPITAL_LETTER_SIGMA = "\u{03A3}"; |
378
|
|
|
public const GREEK_CAPITAL_LETTER_PHI = "\u{03A6}"; |
379
|
|
|
public const GREEK_CAPITAL_LETTER_OMEGA = "\u{03A9}"; |
380
|
|
|
public const GREEK_SMALL_LETTER_ALPHA = "\u{03B1}"; |
381
|
|
|
public const GREEK_SMALL_LETTER_DELTA = "\u{03B4}"; |
382
|
|
|
public const GREEK_SMALL_LETTER_EPSILON = "\u{03B5}"; |
383
|
|
|
public const GREEK_SMALL_LETTER_PI = "\u{03C0}"; |
384
|
|
|
public const GREEK_SMALL_LETTER_SIGMA = "\u{03C3}"; |
385
|
|
|
public const GREEK_SMALL_LETTER_TAU = "\u{03C4}"; |
386
|
|
|
public const GREEK_SMALL_LETTER_PHI = "\u{03C6}"; |
387
|
|
|
public const CYRILLIC_CAPITAL_LETTER_IO = "\u{0401}"; |
388
|
|
|
public const CYRILLIC_CAPITAL_LETTER_DJE = "\u{0402}"; |
389
|
|
|
public const CYRILLIC_CAPITAL_LETTER_GJE = "\u{0403}"; |
390
|
|
|
public const CYRILLIC_CAPITAL_LETTER_UKRANIAN_IE = "\u{0404}"; |
391
|
|
|
public const CYRILLIC_CAPITAL_LETTER_DZE = "\u{0405}"; |
392
|
|
|
public const CYRILLIC_CAPITAL_LETTER_BYELORUSSIAN_UKRAINIAN_I = "\u{0406}"; |
393
|
|
|
public const CYRILLIC_CAPITAL_LETTER_YI = "\u{0407}"; |
394
|
|
|
public const CYRILLIC_CAPITAL_LETTER_JE = "\u{0408}"; |
395
|
|
|
public const CYRILLIC_CAPITAL_LETTER_LJE = "\u{0409}"; |
396
|
|
|
public const CYRILLIC_CAPITAL_LETTER_NJE = "\u{040A}"; |
397
|
|
|
public const CYRILLIC_CAPITAL_LETTER_TSHE = "\u{040B}"; |
398
|
|
|
public const CYRILLIC_CAPITAL_LETTER_KJE = "\u{040C}"; |
399
|
|
|
public const CYRILLIC_CAPITAL_LETTER_SHORT_U = "\u{040E}"; |
400
|
|
|
public const CYRILLIC_CAPITAL_LETTER_DZHE = "\u{040F}"; |
401
|
|
|
public const CYRILLIC_CAPITAL_LETTER_A = "\u{0410}"; |
402
|
|
|
public const CYRILLIC_CAPITAL_LETTER_BE = "\u{0411}"; |
403
|
|
|
public const CYRILLIC_CAPITAL_LETTER_VE = "\u{0412}"; |
404
|
|
|
public const CYRILLIC_CAPITAL_LETTER_GHE = "\u{0413}"; |
405
|
|
|
public const CYRILLIC_CAPITAL_LETTER_DE = "\u{0414}"; |
406
|
|
|
public const CYRILLIC_CAPITAL_LETTER_IE = "\u{0415}"; |
407
|
|
|
public const CYRILLIC_CAPITAL_LETTER_ZHE = "\u{0416}"; |
408
|
|
|
public const CYRILLIC_CAPITAL_LETTER_ZE = "\u{0417}"; |
409
|
|
|
public const CYRILLIC_CAPITAL_LETTER_I = "\u{0418}"; |
410
|
|
|
public const CYRILLIC_CAPITAL_LETTER_SHORT_I = "\u{0419}"; |
411
|
|
|
public const CYRILLIC_CAPITAL_LETTER_KA = "\u{041A}"; |
412
|
|
|
public const CYRILLIC_CAPITAL_LETTER_EL = "\u{041B}"; |
413
|
|
|
public const CYRILLIC_CAPITAL_LETTER_EM = "\u{041C}"; |
414
|
|
|
public const CYRILLIC_CAPITAL_LETTER_EN = "\u{041D}"; |
415
|
|
|
public const CYRILLIC_CAPITAL_LETTER_O = "\u{041E}"; |
416
|
|
|
public const CYRILLIC_CAPITAL_LETTER_PE = "\u{041F}"; |
417
|
|
|
public const CYRILLIC_CAPITAL_LETTER_ER = "\u{0420}"; |
418
|
|
|
public const CYRILLIC_CAPITAL_LETTER_ES = "\u{0421}"; |
419
|
|
|
public const CYRILLIC_CAPITAL_LETTER_TE = "\u{0422}"; |
420
|
|
|
public const CYRILLIC_CAPITAL_LETTER_U = "\u{0423}"; |
421
|
|
|
public const CYRILLIC_CAPITAL_LETTER_EF = "\u{0424}"; |
422
|
|
|
public const CYRILLIC_CAPITAL_LETTER_HA = "\u{0425}"; |
423
|
|
|
public const CYRILLIC_CAPITAL_LETTER_TSE = "\u{0426}"; |
424
|
|
|
public const CYRILLIC_CAPITAL_LETTER_CHE = "\u{0427}"; |
425
|
|
|
public const CYRILLIC_CAPITAL_LETTER_SHA = "\u{0428}"; |
426
|
|
|
public const CYRILLIC_CAPITAL_LETTER_SHCHA = "\u{0429}"; |
427
|
|
|
public const CYRILLIC_CAPITAL_LETTER_HARD_SIGN = "\u{042A}"; |
428
|
|
|
public const CYRILLIC_CAPITAL_LETTER_YERU = "\u{042B}"; |
429
|
|
|
public const CYRILLIC_CAPITAL_LETTER_SOFT_SIGN = "\u{042C}"; |
430
|
|
|
public const CYRILLIC_CAPITAL_LETTER_E = "\u{042D}"; |
431
|
|
|
public const CYRILLIC_CAPITAL_LETTER_YU = "\u{042E}"; |
432
|
|
|
public const CYRILLIC_CAPITAL_LETTER_YA = "\u{042F}"; |
433
|
|
|
public const CYRILLIC_SMALL_LETTER_A = "\u{0430}"; |
434
|
|
|
public const CYRILLIC_SMALL_LETTER_BE = "\u{0431}"; |
435
|
|
|
public const CYRILLIC_SMALL_LETTER_VE = "\u{0432}"; |
436
|
|
|
public const CYRILLIC_SMALL_LETTER_GHE = "\u{0433}"; |
437
|
|
|
public const CYRILLIC_SMALL_LETTER_DE = "\u{0434}"; |
438
|
|
|
public const CYRILLIC_SMALL_LETTER_IE = "\u{0435}"; |
439
|
|
|
public const CYRILLIC_SMALL_LETTER_ZHE = "\u{0436}"; |
440
|
|
|
public const CYRILLIC_SMALL_LETTER_ZE = "\u{0437}"; |
441
|
|
|
public const CYRILLIC_SMALL_LETTER_I = "\u{0438}"; |
442
|
|
|
public const CYRILLIC_SMALL_LETTER_SHORT_I = "\u{0439}"; |
443
|
|
|
public const CYRILLIC_SMALL_LETTER_KA = "\u{043A}"; |
444
|
|
|
public const CYRILLIC_SMALL_LETTER_EL = "\u{043B}"; |
445
|
|
|
public const CYRILLIC_SMALL_LETTER_EM = "\u{043C}"; |
446
|
|
|
public const CYRILLIC_SMALL_LETTER_EN = "\u{043D}"; |
447
|
|
|
public const CYRILLIC_SMALL_LETTER_O = "\u{043E}"; |
448
|
|
|
public const CYRILLIC_SMALL_LETTER_PE = "\u{043F}"; |
449
|
|
|
public const CYRILLIC_SMALL_LETTER_ER = "\u{0440}"; |
450
|
|
|
public const CYRILLIC_SMALL_LETTER_ES = "\u{0441}"; |
451
|
|
|
public const CYRILLIC_SMALL_LETTER_TE = "\u{0442}"; |
452
|
|
|
public const CYRILLIC_SMALL_LETTER_U = "\u{0443}"; |
453
|
|
|
public const CYRILLIC_SMALL_LETTER_EF = "\u{0444}"; |
454
|
|
|
public const CYRILLIC_SMALL_LETTER_HA = "\u{0445}"; |
455
|
|
|
public const CYRILLIC_SMALL_LETTER_TSE = "\u{0446}"; |
456
|
|
|
public const CYRILLIC_SMALL_LETTER_CHE = "\u{0447}"; |
457
|
|
|
public const CYRILLIC_SMALL_LETTER_SHA = "\u{0448}"; |
458
|
|
|
public const CYRILLIC_SMALL_LETTER_SHCHA = "\u{0449}"; |
459
|
|
|
public const CYRILLIC_SMALL_LETTER_HARD_SIGN = "\u{044A}"; |
460
|
|
|
public const CYRILLIC_SMALL_LETTER_YERU = "\u{044B}"; |
461
|
|
|
public const CYRILLIC_SMALL_LETTER_SOFT_SIGN = "\u{044C}"; |
462
|
|
|
public const CYRILLIC_SMALL_LETTER_E = "\u{044D}"; |
463
|
|
|
public const CYRILLIC_SMALL_LETTER_YU = "\u{044E}"; |
464
|
|
|
public const CYRILLIC_SMALL_LETTER_YA = "\u{044F}"; |
465
|
|
|
public const CYRILLIC_SMALL_LETTER_IO = "\u{0451}"; |
466
|
|
|
public const CYRILLIC_SMALL_LETTER_DJE = "\u{0452}"; |
467
|
|
|
public const CYRILLIC_SMALL_LETTER_GJE = "\u{0453}"; |
468
|
|
|
public const CYRILLIC_SMALL_LETTER_UKRANIAN_IE = "\u{0454}"; |
469
|
|
|
public const CYRILLIC_SMALL_LETTER_DZE = "\u{0455}"; |
470
|
|
|
public const CYRILLIC_SMALL_LETTER_BYELORUSSIAN_UKRAINIAN_I = "\u{0456}"; |
471
|
|
|
public const CYRILLIC_SMALL_LETTER_YI = "\u{0457}"; |
472
|
|
|
public const CYRILLIC_SMALL_LETTER_JE = "\u{0458}"; |
473
|
|
|
public const CYRILLIC_SMALL_LETTER_LJE = "\u{0459}"; |
474
|
|
|
public const CYRILLIC_SMALL_LETTER_NJE = "\u{045A}"; |
475
|
|
|
public const CYRILLIC_SMALL_LETTER_TSHE = "\u{045B}"; |
476
|
|
|
public const CYRILLIC_SMALL_LETTER_KJE = "\u{045C}"; |
477
|
|
|
public const CYRILLIC_SMALL_LETTER_SHORT_U = "\u{045E}"; |
478
|
|
|
public const CYRILLIC_SMALL_LETTER_DZHE = "\u{045F}"; |
479
|
|
|
public const CYRILLIC_CAPITAL_LETTER_GHE_WITH_UPTURN = "\u{0490}"; |
480
|
|
|
public const CYRILLIC_SMALL_LETTER_GHE_WITH_UPTURN = "\u{0491}"; |
481
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW = "\u{1E00}"; |
482
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_RING_BELOW = "\u{1E01}"; |
483
|
|
|
public const LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE = "\u{1E02}"; |
484
|
|
|
public const LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE = "\u{1E03}"; |
485
|
|
|
public const LATIN_CAPITAL_LETTER_B_WITH_DOT_BELOW = "\u{1E04}"; |
486
|
|
|
public const LATIN_SMALL_LETTER_B_WITH_DOT_BELOW = "\u{1E05}"; |
487
|
|
|
public const LATIN_CAPITAL_LETTER_C_WITH_CEDILLA_AND_ACUTE = "\u{1E08}"; |
488
|
|
|
public const LATIN_SMALL_LETTER_C_WITH_CEDILLA_AND_ACUTE = "\u{1E09}"; |
489
|
|
|
public const LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE = "\u{1E0A}"; |
490
|
|
|
public const LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE = "\u{1E0B}"; |
491
|
|
|
public const LATIN_CAPITAL_LETTER_D_WITH_DOT_BELOW = "\u{1E0C}"; |
492
|
|
|
public const LATIN_SMALL_LETTER_D_WITH_DOT_BELOW = "\u{1E0D}"; |
493
|
|
|
public const LATIN_CAPITAL_LETTER_SHARP_S = "\u{1E9E}"; |
494
|
|
|
public const LATIN_CAPITAL_LETTER_D_WITH_CEDILLA = "\u{1E10}"; |
495
|
|
|
public const LATIN_SMALL_LETTER_D_WITH_CEDILLA = "\u{1E11}"; |
496
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_GRAVE = "\u{1E14}"; |
497
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_MACRON_AND_GRAVE = "\u{1E15}"; |
498
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE = "\u{1E16}"; |
499
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE = "\u{1E17}"; |
500
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CEDILLA_AND_BREVE = "\u{1E1C}"; |
501
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CEDILLA_AND_BREVE = "\u{1E1D}"; |
502
|
|
|
public const LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE = "\u{1E1E}"; |
503
|
|
|
public const LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE = "\u{1E1F}"; |
504
|
|
|
public const LATIN_CAPITAL_LETTER_G_WITH_MACRON = "\u{1E20}"; |
505
|
|
|
public const LATIN_SMALL_LETTER_G_WITH_MACRON = "\u{1E21}"; |
506
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_DOT_ABOVE = "\u{1E22}"; |
507
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_DOT_ABOVE = "\u{1E23}"; |
508
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW = "\u{1E24}"; |
509
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_DOT_BELOW = "\u{1E25}"; |
510
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_DIAERESIS = "\u{1E26}"; |
511
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_DIAERESIS = "\u{1E27}"; |
512
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_CEDILLA = "\u{1E28}"; |
513
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_CEDILLA = "\u{1E29}"; |
514
|
|
|
public const LATIN_CAPITAL_LETTER_H_WITH_BREVE_BELOW = "\u{1E2A}"; |
515
|
|
|
public const LATIN_SMALL_LETTER_H_WITH_BREVE_BELOW = "\u{1E2B}"; |
516
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS_AND_ACUTE = "\u{1E2E}"; |
517
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_DIAERESIS_AND_ACUTE = "\u{1E2F}"; |
518
|
|
|
public const LATIN_CAPITAL_LETTER_K_WITH_ACUTE = "\u{1E30}"; |
519
|
|
|
public const LATIN_SMALL_LETTER_K_WITH_ACUTE = "\u{1E31}"; |
520
|
|
|
public const LATIN_CAPITAL_LETTER_K_WITH_DOT_BELOW = "\u{1E32}"; |
521
|
|
|
public const LATIN_SMALL_LETTER_K_WITH_DOT_BELOW = "\u{1E33}"; |
522
|
|
|
public const LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW = "\u{1E36}"; |
523
|
|
|
public const LATIN_SMALL_LETTER_L_WITH_DOT_BELOW = "\u{1E37}"; |
524
|
|
|
public const LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW_AND_MACRON = "\u{1E38}"; |
525
|
|
|
public const LATIN_SMALL_LETTER_L_WITH_DOT_BELOW_AND_MACRON = "\u{1E39}"; |
526
|
|
|
public const LATIN_CAPITAL_LETTER_M_WITH_ACUTE = "\u{1E3E}"; |
527
|
|
|
public const LATIN_SMALL_LETTER_M_WITH_ACUTE = "\u{1E3F}"; |
528
|
|
|
public const LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE = "\u{1E40}"; |
529
|
|
|
public const LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE = "\u{1E41}"; |
530
|
|
|
public const LATIN_CAPITAL_LETTER_M_WITH_DOT_BELOW = "\u{1E42}"; |
531
|
|
|
public const LATIN_SMALL_LETTER_M_WITH_DOT_BELOW = "\u{1E43}"; |
532
|
|
|
public const LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE = "\u{1E44}"; |
533
|
|
|
public const LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE = "\u{1E45}"; |
534
|
|
|
public const LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW = "\u{1E46}"; |
535
|
|
|
public const LATIN_SMALL_LETTER_N_WITH_DOT_BELOW = "\u{1E47}"; |
536
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_ACUTE = "\u{1E4C}"; |
537
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_ACUTE = "\u{1E4D}"; |
538
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_DIAERESIS = "\u{1E4E}"; |
539
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_DIAERESIS = "\u{1E4F}"; |
540
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_GRAVE = "\u{1E50}"; |
541
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_MACRON_AND_GRAVE = "\u{1E51}"; |
542
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_ACUTE = "\u{1E52}"; |
543
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_MACRON_AND_ACUTE = "\u{1E53}"; |
544
|
|
|
public const LATIN_CAPITAL_LETTER_P_WITH_ACUTE = "\u{1E54}"; |
545
|
|
|
public const LATIN_SMALL_LETTER_P_WITH_ACUTE = "\u{1E55}"; |
546
|
|
|
public const LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE = "\u{1E56}"; |
547
|
|
|
public const LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE = "\u{1E57}"; |
548
|
|
|
public const LATIN_CAPITAL_LETTER_R_WITH_DOT_ABOVE = "\u{1E58}"; |
549
|
|
|
public const LATIN_SMALL_LETTER_R_WITH_DOT_ABOVE = "\u{1E59}"; |
550
|
|
|
public const LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW = "\u{1E5A}"; |
551
|
|
|
public const LATIN_SMALL_LETTER_R_WITH_DOT_BELOW = "\u{1E5B}"; |
552
|
|
|
public const LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW_AND_MACRON = "\u{1E5C}"; |
553
|
|
|
public const LATIN_SMALL_LETTER_R_WITH_DOT_BELOW_AND_MACRON = "\u{1E5D}"; |
554
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE = "\u{1E60}"; |
555
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE = "\u{1E61}"; |
556
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW = "\u{1E62}"; |
557
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_DOT_BELOW = "\u{1E63}"; |
558
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE = "\u{1E64}"; |
559
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE = "\u{1E65}"; |
560
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_CARON_AND_DOT_ABOVE = "\u{1E66}"; |
561
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_CARON_AND_DOT_ABOVE = "\u{1E67}"; |
562
|
|
|
public const LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE = "\u{1E68}"; |
563
|
|
|
public const LATIN_SMALL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE = "\u{1E69}"; |
564
|
|
|
public const LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE = "\u{1E6A}"; |
565
|
|
|
public const LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE = "\u{1E6B}"; |
566
|
|
|
public const LATIN_CAPITAL_LETTER_T_WITH_DOT_BELOW = "\u{1E6C}"; |
567
|
|
|
public const LATIN_SMALL_LETTER_T_WITH_DOT_BELOW = "\u{1E6D}"; |
568
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_BELOW = "\u{1E72}"; |
569
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_BELOW = "\u{1E73}"; |
570
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_TILDE_AND_ACUTE = "\u{1E78}"; |
571
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_TILDE_AND_ACUTE = "\u{1E79}"; |
572
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_MACRON_AND_DIAERESIS = "\u{1E7A}"; |
573
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_MACRON_AND_DIAERESIS = "\u{1E7B}"; |
574
|
|
|
public const LATIN_CAPITAL_LETTER_V_WITH_TILDE = "\u{1E7C}"; |
575
|
|
|
public const LATIN_SMALL_LETTER_V_WITH_TILDE = "\u{1E7D}"; |
576
|
|
|
public const LATIN_CAPITAL_LETTER_V_WITH_DOT_BELOW = "\u{1E7E}"; |
577
|
|
|
public const LATIN_SMALL_LETTER_V_WITH_DOT_BELOW = "\u{1E7F}"; |
578
|
|
|
public const LATIN_CAPITAL_LETTER_W_WITH_GRAVE = "\u{1E80}"; |
579
|
|
|
public const LATIN_SMALL_LETTER_W_WITH_GRAVE = "\u{1E81}"; |
580
|
|
|
public const LATIN_CAPITAL_LETTER_W_WITH_ACUTE = "\u{1E82}"; |
581
|
|
|
public const LATIN_SMALL_LETTER_W_WITH_ACUTE = "\u{1E83}"; |
582
|
|
|
public const LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS = "\u{1E84}"; |
583
|
|
|
public const LATIN_SMALL_LETTER_W_WITH_DIAERESIS = "\u{1E85}"; |
584
|
|
|
public const LATIN_CAPITAL_LETTER_W_WITH_DOT_ABOVE = "\u{1E86}"; |
585
|
|
|
public const LATIN_SMALL_LETTER_W_WITH_DOT_ABOVE = "\u{1E87}"; |
586
|
|
|
public const LATIN_CAPITAL_LETTER_W_WITH_DOT_BELOW = "\u{1E88}"; |
587
|
|
|
public const LATIN_SMALL_LETTER_W_WITH_DOT_BELOW = "\u{1E89}"; |
588
|
|
|
public const LATIN_CAPITAL_LETTER_X_WITH_DOT_ABOVE = "\u{1E8A}"; |
589
|
|
|
public const LATIN_SMALL_LETTER_X_WITH_DOT_ABOVE = "\u{1E8B}"; |
590
|
|
|
public const LATIN_CAPITAL_LETTER_X_WITH_DIAERESIS = "\u{1E8C}"; |
591
|
|
|
public const LATIN_SMALL_LETTER_X_WITH_DIAERESIS = "\u{1E8D}"; |
592
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_DOT_ABOVE = "\u{1E8E}"; |
593
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_DOT_ABOVE = "\u{1E8F}"; |
594
|
|
|
public const LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX = "\u{1E90}"; |
595
|
|
|
public const LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX = "\u{1E91}"; |
596
|
|
|
public const LATIN_CAPITAL_LETTER_Z_WITH_DOT_BELOW = "\u{1E92}"; |
597
|
|
|
public const LATIN_SMALL_LETTER_Z_WITH_DOT_BELOW = "\u{1E93}"; |
598
|
|
|
public const LATIN_SMALL_LETTER_T_WITH_DIAERESIS = "\u{1E97}"; |
599
|
|
|
public const LATIN_SMALL_LETTER_W_WITH_RING_ABOVE = "\u{1E98}"; |
600
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_RING_ABOVE = "\u{1E99}"; |
601
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_DOT_BELOW = "\u{1EA0}"; |
602
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_DOT_BELOW = "\u{1EA1}"; |
603
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_HOOK_ABOVE = "\u{1EA2}"; |
604
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_HOOK_ABOVE = "\u{1EA3}"; |
605
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE = "\u{1EA4}"; |
606
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE = "\u{1EA5}"; |
607
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE = "\u{1EA6}"; |
608
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE = "\u{1EA7}"; |
609
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EA8}"; |
610
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EA9}"; |
611
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE = "\u{1EAA}"; |
612
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE = "\u{1EAB}"; |
613
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW = "\u{1EAC}"; |
614
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW = "\u{1EAD}"; |
615
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_ACUTE = "\u{1EAE}"; |
616
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_ACUTE = "\u{1EAF}"; |
617
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_GRAVE = "\u{1EB0}"; |
618
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_GRAVE = "\u{1EB1}"; |
619
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE = "\u{1EB2}"; |
620
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE = "\u{1EB3}"; |
621
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_TILDE = "\u{1EB4}"; |
622
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_TILDE = "\u{1EB5}"; |
623
|
|
|
public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_DOT_BELOW = "\u{1EB6}"; |
624
|
|
|
public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_DOT_BELOW = "\u{1EB7}"; |
625
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_DOT_BELOW = "\u{1EB8}"; |
626
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_DOT_BELOW = "\u{1EB9}"; |
627
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_HOOK_ABOVE = "\u{1EBA}"; |
628
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_HOOK_ABOVE = "\u{1EBB}"; |
629
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_TILDE = "\u{1EBC}"; |
630
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_TILDE = "\u{1EBD}"; |
631
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE = "\u{1EBE}"; |
632
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE = "\u{1EBF}"; |
633
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE = "\u{1EC0}"; |
634
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE = "\u{1EC1}"; |
635
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EC2}"; |
636
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EC3}"; |
637
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE = "\u{1EC4}"; |
638
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE = "\u{1EC5}"; |
639
|
|
|
public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW = "\u{1EC6}"; |
640
|
|
|
public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW = "\u{1EC7}"; |
641
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_HOOK_ABOVE = "\u{1EC8}"; |
642
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_HOOK_ABOVE = "\u{1EC9}"; |
643
|
|
|
public const LATIN_CAPITAL_LETTER_I_WITH_DOT_BELOW = "\u{1ECA}"; |
644
|
|
|
public const LATIN_SMALL_LETTER_I_WITH_DOT_BELOW = "\u{1ECB}"; |
645
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_DOT_BELOW = "\u{1ECC}"; |
646
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_DOT_BELOW = "\u{1ECD}"; |
647
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_HOOK_ABOVE = "\u{1ECE}"; |
648
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_HOOK_ABOVE = "\u{1ECF}"; |
649
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE = "\u{1ED0}"; |
650
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE = "\u{1ED1}"; |
651
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE = "\u{1ED2}"; |
652
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE = "\u{1ED3}"; |
653
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1ED4}"; |
654
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1ED5}"; |
655
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE = "\u{1ED6}"; |
656
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE = "\u{1ED7}"; |
657
|
|
|
public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW = "\u{1ED8}"; |
658
|
|
|
public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW = "\u{1ED9}"; |
659
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_DOT_BELOW = "\u{1EE4}"; |
660
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_DOT_BELOW = "\u{1EE5}"; |
661
|
|
|
public const LATIN_CAPITAL_LETTER_U_WITH_HOOK_ABOVE = "\u{1EE6}"; |
662
|
|
|
public const LATIN_SMALL_LETTER_U_WITH_HOOK_ABOVE = "\u{1EE7}"; |
663
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_GRAVE = "\u{1EF2}"; |
664
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_GRAVE = "\u{1EF3}"; |
665
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_DOT_BELOW = "\u{1EF4}"; |
666
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_DOT_BELOW = "\u{1EF5}"; |
667
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_HOOK_ABOVE = "\u{1EF6}"; |
668
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_HOOK_ABOVE = "\u{1EF7}"; |
669
|
|
|
public const LATIN_CAPITAL_LETTER_Y_WITH_TILDE = "\u{1EF8}"; |
670
|
|
|
public const LATIN_SMALL_LETTER_Y_WITH_TILDE = "\u{1EF9}"; |
671
|
|
|
public const ZERO_WIDTH_NON_JOINER = "\u{200C}"; |
672
|
|
|
public const ZERO_WIDTH_JOINER = "\u{200D}"; |
673
|
|
|
public const EN_DASH = "\u{2013}"; |
674
|
|
|
public const EM_DASH = "\u{2014}"; |
675
|
|
|
public const DOUBLE_LOW_LINE = "\u{2017}"; |
676
|
|
|
public const LEFT_SINGLE_QUOTATION_MARK = "\u{2018}"; |
677
|
|
|
public const RIGHT_SINGLE_QUOTATION_MARK = "\u{2019}"; |
678
|
|
|
public const SINGLE_LOW_9_QUOTATION_MARK = "\u{201A}"; |
679
|
|
|
public const LEFT_DOUBLE_QUOTATION_MARK = "\u{201C}"; |
680
|
|
|
public const RIGHT_DOUBLE_QUOTATION_MARK = "\u{201D}"; |
681
|
|
|
public const DOUBLE_LOW_9_QUOTATION_MARK = "\u{201E}"; |
682
|
|
|
public const DAGGER = "\u{2020}"; |
683
|
|
|
public const DOUBLE_DAGGER = "\u{2021}"; |
684
|
|
|
public const BULLET = "\u{2022}"; |
685
|
|
|
public const HORIZONTAL_ELLIPSIS = "\u{2026}"; |
686
|
|
|
public const PER_MILLE_SIGN = "\u{2030}"; |
687
|
|
|
public const SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK = "\u{2039}"; |
688
|
|
|
public const SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK = "\u{203A}"; |
689
|
|
|
public const FRACTION_SLASH = "\u{2044}"; |
690
|
|
|
public const SUPERSCRIPT_LATIN_SMALL_LETTER_N = "\u{207F}"; |
691
|
|
|
public const PESETA_SIGN = "\u{20A7}"; |
692
|
|
|
public const EURO_SIGN = "\u{20AC}"; |
693
|
|
|
public const SCRIPT_SMALL_L = "\u{2113}"; |
694
|
|
|
public const NUMERO_SIGN = "\u{2116}"; |
695
|
|
|
public const SOUND_RECORDING_COPYRIGHT = "\u{2117}"; |
696
|
|
|
public const TRADE_MARK_SIGN = "\u{2122}"; |
697
|
|
|
public const PARTIAL_DIFFERENTIAL = "\u{2202}"; |
698
|
|
|
public const INCREMENT = "\u{2206}"; |
699
|
|
|
public const N_ARY_PRODUCT = "\u{220F}"; |
700
|
|
|
public const N_ARY_SUMMATION = "\u{2211}"; |
701
|
|
|
public const BULLET_OPERATOR = "\u{2219}"; |
702
|
|
|
public const SQUARE_ROOT = "\u{221A}"; |
703
|
|
|
public const INFINITY = "\u{221E}"; |
704
|
|
|
public const INTERSECTION = "\u{2229}"; |
705
|
|
|
public const INTEGRAL = "\u{222B}"; |
706
|
|
|
public const ALMOST_EQUAL_TO = "\u{2248}"; |
707
|
|
|
public const NOT_EQUAL_TO = "\u{2260}"; |
708
|
|
|
public const IDENTICAL_TO = "\u{2261}"; |
709
|
|
|
public const LESS_THAN_OR_EQUAL_TO = "\u{2264}"; |
710
|
|
|
public const GREATER_THAN_OR_EQUAL_TO = "\u{2265}"; |
711
|
|
|
public const REVERSED_NOT_SIGN = "\u{2310}"; |
712
|
|
|
public const TOP_HALF_INTEGRAL = "\u{2320}"; |
713
|
|
|
public const BOTTOM_HALF_INTEGRAL = "\u{2321}"; |
714
|
|
|
public const BOX_DRAWINGS_LIGHT_HORIZONTAL = "\u{2500}"; |
715
|
|
|
public const BOX_DRAWINGS_LIGHT_VERTICAL = "\u{2502}"; |
716
|
|
|
public const BOX_DRAWINGS_LIGHT_DOWN_AND_RIGHT = "\u{250C}"; |
717
|
|
|
public const BOX_DRAWINGS_LIGHT_DOWN_AND_LEFT = "\u{2510}"; |
718
|
|
|
public const BOX_DRAWINGS_LIGHT_UP_AND_LEFT = "\u{2518}"; |
719
|
|
|
public const BOX_DRAWINGS_LIGHT_UP_AND_RIGHT = "\u{2514}"; |
720
|
|
|
public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_RIGHT = "\u{251C}"; |
721
|
|
|
public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_LEFT = "\u{2524}"; |
722
|
|
|
public const BOX_DRAWINGS_LIGHT_DOWN_AND_HORIZONTAL = "\u{252C}"; |
723
|
|
|
public const BOX_DRAWINGS_LIGHT_UP_AND_HORIZONTAL = "\u{2534}"; |
724
|
|
|
public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_HORIZONTAL = "\u{253C}"; |
725
|
|
|
public const BOX_DRAWINGS_DOUBLE_HORIZONTAL = "\u{2550}"; |
726
|
|
|
public const BOX_DRAWINGS_DOUBLE_VERTICAL = "\u{2551}"; |
727
|
|
|
public const BOX_DRAWINGS_DOWN_SINGLE_AND_RIGHT_DOUBLE = "\u{2552}"; |
728
|
|
|
public const BOX_DRAWINGS_DOWN_DOUBLE_AND_RIGHT_SINGLE = "\u{2553}"; |
729
|
|
|
public const BOX_DRAWINGS_DOUBLE_DOWN_AND_RIGHT = "\u{2554}"; |
730
|
|
|
public const BOX_DRAWINGS_DOWN_SINGLE_AND_LEFT_DOUBLE = "\u{2555}"; |
731
|
|
|
public const BOX_DRAWINGS_DOWN_DOUBLE_AND_LEFT_SINGLE = "\u{2556}"; |
732
|
|
|
public const BOX_DRAWINGS_DOUBLE_DOWN_AND_LEFT = "\u{2557}"; |
733
|
|
|
public const BOX_DRAWINGS_UP_SINGLE_AND_RIGHT_DOUBLE = "\u{2558}"; |
734
|
|
|
public const BOX_DRAWINGS_UP_DOUBLE_AND_RIGHT_SINGLE = "\u{2559}"; |
735
|
|
|
public const BOX_DRAWINGS_DOUBLE_UP_AND_RIGHT = "\u{255A}"; |
736
|
|
|
public const BOX_DRAWINGS_UP_SINGLE_AND_LEFT_DOUBLE = "\u{255B}"; |
737
|
|
|
public const BOX_DRAWINGS_UP_DOUBLE_AND_LEFT_SINGLE = "\u{255C}"; |
738
|
|
|
public const BOX_DRAWINGS_DOUBLE_UP_AND_LEFT = "\u{255D}"; |
739
|
|
|
public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_RIGHT_DOUBLE = "\u{255E}"; |
740
|
|
|
public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_RIGHT_SINGLE = "\u{255F}"; |
741
|
|
|
public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_RIGHT = "\u{2560}"; |
742
|
|
|
public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_LEFT_DOUBLE = "\u{2561}"; |
743
|
|
|
public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_LEFT_SINGLE = "\u{2562}"; |
744
|
|
|
public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_LEFT = "\u{2563}"; |
745
|
|
|
public const BOX_DRAWINGS_DOWN_SINGLE_AND_HORIZONTAL_DOUBLE = "\u{2564}"; |
746
|
|
|
public const BOX_DRAWINGS_DOWN_DOUBLE_AND_HORIZONTAL_SINGLE = "\u{2565}"; |
747
|
|
|
public const BOX_DRAWINGS_DOUBLE_DOWN_AND_HORIZONTAL = "\u{2566}"; |
748
|
|
|
public const BOX_DRAWINGS_UP_SINGLE_AND_HORIZONTAL_DOUBLE = "\u{2567}"; |
749
|
|
|
public const BOX_DRAWINGS_UP_DOUBLE_AND_HORIZONTAL_SINGLE = "\u{2568}"; |
750
|
|
|
public const BOX_DRAWINGS_BOX_DRAWINGS_DOUBLE_UP_AND_HORIZONTAL = "\u{2569}"; |
751
|
|
|
public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_HORIZONTAL_DOUBLE = "\u{256A}"; |
752
|
|
|
public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_HORIZONTAL_SINGLE = "\u{256B}"; |
753
|
|
|
public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_HORIZONTAL = "\u{256C}"; |
754
|
|
|
public const UPPER_HALF_BLOCK = "\u{2580}"; |
755
|
|
|
public const LOWER_HALF_BLOCK = "\u{2584}"; |
756
|
|
|
public const FULL_BLOCK = "\u{2588}"; |
757
|
|
|
public const LEFT_HALF_BLOCK = "\u{258C}"; |
758
|
|
|
public const RIGHT_HALF_BLOCK = "\u{2590}"; |
759
|
|
|
public const LIGHT_SHADE = "\u{2591}"; |
760
|
|
|
public const MEDIUM_SHADE = "\u{2592}"; |
761
|
|
|
public const DARK_SHADE = "\u{2593}"; |
762
|
|
|
public const BLACK_SQUARE = "\u{25A0}"; |
763
|
|
|
public const WHITE_SQUARE = "\u{25A1}"; |
764
|
|
|
public const LOZENGE = "\u{25CA}"; |
765
|
|
|
public const MUSIC_FLAT_SIGN = "\u{266D}"; |
766
|
|
|
public const MUSIC_SHARP_SIGN = "\u{266F}"; |
767
|
|
|
public const LATIN_SMALL_LIGATURE_FI = "\u{FB01}"; |
768
|
|
|
public const LATIN_SMALL_LIGATURE_FL = "\u{FB02}"; |
769
|
|
|
public const BYTE_ORDER_MARK = "\u{FEFF}"; |
770
|
|
|
public const REPLACEMENT_CHARACTER = "\u{FFFD}"; |
771
|
|
|
|
772
|
|
|
public const COMPOSED_CHARACTERS = [ |
773
|
|
|
'A' . self::COMBINING_ACUTE_ACCENT => self::LATIN_CAPITAL_LETTER_A_WITH_ACUTE, |
774
|
|
|
'A' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, |
775
|
|
|
'A' . self::COMBINING_DIAERESIS => self::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, |
776
|
|
|
'A' . self::COMBINING_GRAVE_ACCENT => self::LATIN_CAPITAL_LETTER_A_WITH_GRAVE, |
777
|
|
|
'A' . self::COMBINING_RING_ABOVE => self::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, |
778
|
|
|
'A' . self::COMBINING_TILDE => self::LATIN_CAPITAL_LETTER_A_WITH_TILDE, |
779
|
|
|
'C' . self::COMBINING_CEDILLA => self::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, |
780
|
|
|
'E' . self::COMBINING_ACUTE_ACCENT => self::LATIN_CAPITAL_LETTER_E_WITH_ACUTE, |
781
|
|
|
'E' . self::COMBINING_DIAERESIS => self::LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, |
782
|
|
|
'E' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, |
783
|
|
|
'E' . self::COMBINING_GRAVE_ACCENT => self::LATIN_CAPITAL_LETTER_E_WITH_GRAVE, |
784
|
|
|
'a' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX, |
785
|
|
|
'a' . self::COMBINING_DIAERESIS => self::LATIN_SMALL_LETTER_A_WITH_DIAERESIS, |
786
|
|
|
'e' . self::COMBINING_ACUTE_ACCENT => self::LATIN_SMALL_LETTER_E_WITH_ACUTE, |
787
|
|
|
'u' . self::COMBINING_DIAERESIS => self::LATIN_SMALL_LETTER_U_WITH_DIAERESIS, |
788
|
|
|
]; |
789
|
|
|
|
790
|
|
|
/** |
791
|
|
|
* Convert text from (potentially invalid) UTF-8 to UTF-8. |
792
|
|
|
* |
793
|
|
|
* @param string $text |
794
|
|
|
* |
795
|
|
|
* @return string |
796
|
|
|
*/ |
797
|
|
|
public function fromUtf8(string $text): string |
798
|
|
|
{ |
799
|
|
|
if (preg_match('//u', $text) === false) { |
800
|
|
|
// Not UTF8? |
801
|
|
|
mb_substitute_character(0xFFFD); |
802
|
|
|
|
803
|
|
|
return mb_convert_encoding($text, 'UTF-8', 'UTF-8'); |
804
|
|
|
} |
805
|
|
|
|
806
|
|
|
return $text; |
807
|
|
|
} |
808
|
|
|
|
809
|
|
|
/** |
810
|
|
|
* Convert text from (potentially invalid) UTF-8 to UTF-8. |
811
|
|
|
* |
812
|
|
|
* @param string $text |
813
|
|
|
* |
814
|
|
|
* @return string |
815
|
|
|
*/ |
816
|
|
|
public function toUtf8(string $text): string |
817
|
|
|
{ |
818
|
|
|
return $this->fromUtf8($text); |
819
|
|
|
} |
820
|
|
|
|
821
|
|
|
/** |
822
|
|
|
* Create a UTF8 character from a code. |
823
|
|
|
* |
824
|
|
|
* @param int $code |
825
|
|
|
* |
826
|
|
|
* @return string |
827
|
|
|
*/ |
828
|
|
|
public static function chr(int $code): string |
829
|
|
|
{ |
830
|
|
|
if ($code < 0 || $code > 0x1FFFFF) { |
831
|
|
|
throw new InvalidArgumentException((string)$code); |
832
|
|
|
} |
833
|
|
|
|
834
|
|
|
if ($code <= 0x7F) { |
835
|
|
|
return chr($code); |
836
|
|
|
} |
837
|
|
|
|
838
|
|
|
if ($code <= 0x7FF) { |
839
|
|
|
return |
840
|
|
|
chr(($code >> 6) + 0xC0) . |
841
|
|
|
chr(($code & 0x3F) + 0x80); |
842
|
|
|
} |
843
|
|
|
|
844
|
|
|
if ($code <= 0xFFFF) { |
845
|
|
|
return |
846
|
|
|
chr(($code >> 12) + 0xE0) . |
847
|
|
|
chr((($code >> 6) & 0x3F) + 0x80) . |
848
|
|
|
chr(($code & 0x3F) + 0x80); |
849
|
|
|
} |
850
|
|
|
|
851
|
|
|
return |
852
|
|
|
chr(($code >> 18) + 0xF0) . |
853
|
|
|
chr((($code >> 12) & 0x3F) + 0x80) . |
854
|
|
|
chr((($code >> 6) & 0x3F) + 0x80) . |
855
|
|
|
chr(($code & 0x3F) + 0x80); |
856
|
|
|
} |
857
|
|
|
} |
858
|
|
|
|